1 /* Module that wraps all OpenSSL hash algorithms */
2 
3 /*
4  * Copyright (C) 2005-2010   Gregory P. Smith (greg@krypto.org)
5  * Licensed to PSF under a Contributor Agreement.
6  *
7  * Derived from a skeleton of shamodule.c containing work performed by:
8  *
9  * Andrew Kuchling (amk@amk.ca)
10  * Greg Stein (gstein@lyra.org)
11  *
12  */
13 
14 #define PY_SSIZE_T_CLEAN
15 
16 #include "Python.h"
17 #include "structmember.h"
18 
19 #ifdef WITH_THREAD
20 #include "pythread.h"
21     #define ENTER_HASHLIB(obj) \
22         if ((obj)->lock) { \
23             if (!PyThread_acquire_lock((obj)->lock, 0)) { \
24                 Py_BEGIN_ALLOW_THREADS \
25                 PyThread_acquire_lock((obj)->lock, 1); \
26                 Py_END_ALLOW_THREADS \
27             } \
28         }
29     #define LEAVE_HASHLIB(obj) \
30         if ((obj)->lock) { \
31             PyThread_release_lock((obj)->lock); \
32         }
33 #else
34     #define ENTER_HASHLIB(obj)
35     #define LEAVE_HASHLIB(obj)
36 #endif
37 
38 /* EVP is the preferred interface to hashing in OpenSSL */
39 #include <openssl/evp.h>
40 
41 #define MUNCH_SIZE INT_MAX
42 
43 /* TODO(gps): We should probably make this a module or EVPobject attribute
44  * to allow the user to optimize based on the platform they're using. */
45 #define HASHLIB_GIL_MINSIZE 2048
46 
47 #ifndef HASH_OBJ_CONSTRUCTOR
48 #define HASH_OBJ_CONSTRUCTOR 0
49 #endif
50 
51 /* Minimum OpenSSL version needed to support sha224 and higher. */
52 #if defined(OPENSSL_VERSION_NUMBER) && (OPENSSL_VERSION_NUMBER >= 0x00908000)
53 #define _OPENSSL_SUPPORTS_SHA2
54 #endif
55 
56 typedef struct {
57     PyObject_HEAD
58     PyObject            *name;  /* name of this hash algorithm */
59     EVP_MD_CTX          ctx;    /* OpenSSL message digest context */
60 #ifdef WITH_THREAD
61     PyThread_type_lock  lock;   /* OpenSSL context lock */
62 #endif
63 } EVPobject;
64 
65 
66 static PyTypeObject EVPtype;
67 
68 
69 #define DEFINE_CONSTS_FOR_NEW(Name)  \
70     static PyObject *CONST_ ## Name ## _name_obj; \
71     static EVP_MD_CTX CONST_new_ ## Name ## _ctx; \
72     static EVP_MD_CTX *CONST_new_ ## Name ## _ctx_p = NULL;
73 
74 DEFINE_CONSTS_FOR_NEW(md5)
DEFINE_CONSTS_FOR_NEW(sha1)75 DEFINE_CONSTS_FOR_NEW(sha1)
76 #ifdef _OPENSSL_SUPPORTS_SHA2
77 DEFINE_CONSTS_FOR_NEW(sha224)
78 DEFINE_CONSTS_FOR_NEW(sha256)
79 DEFINE_CONSTS_FOR_NEW(sha384)
80 DEFINE_CONSTS_FOR_NEW(sha512)
81 #endif
82 
83 
84 static EVPobject *
85 newEVPobject(PyObject *name)
86 {
87     EVPobject *retval = (EVPobject *)PyObject_New(EVPobject, &EVPtype);
88 
89     /* save the name for .name to return */
90     if (retval != NULL) {
91         Py_INCREF(name);
92         retval->name = name;
93 #ifdef WITH_THREAD
94         retval->lock = NULL;
95 #endif
96     }
97 
98     return retval;
99 }
100 
101 static void
EVP_hash(EVPobject * self,const void * vp,Py_ssize_t len)102 EVP_hash(EVPobject *self, const void *vp, Py_ssize_t len)
103 {
104     unsigned int process;
105     const unsigned char *cp = (const unsigned char *)vp;
106     while (0 < len)
107     {
108         if (len > (Py_ssize_t)MUNCH_SIZE)
109             process = MUNCH_SIZE;
110         else
111             process = Py_SAFE_DOWNCAST(len, Py_ssize_t, unsigned int);
112         EVP_DigestUpdate(&self->ctx, (const void*)cp, process);
113         len -= process;
114         cp += process;
115     }
116 }
117 
118 /* Internal methods for a hash object */
119 
120 static void
EVP_dealloc(EVPobject * self)121 EVP_dealloc(EVPobject *self)
122 {
123 #ifdef WITH_THREAD
124     if (self->lock != NULL)
125         PyThread_free_lock(self->lock);
126 #endif
127     EVP_MD_CTX_cleanup(&self->ctx);
128     Py_XDECREF(self->name);
129     PyObject_Del(self);
130 }
131 
locked_EVP_MD_CTX_copy(EVP_MD_CTX * new_ctx_p,EVPobject * self)132 static void locked_EVP_MD_CTX_copy(EVP_MD_CTX *new_ctx_p, EVPobject *self)
133 {
134     ENTER_HASHLIB(self);
135     EVP_MD_CTX_copy(new_ctx_p, &self->ctx);
136     LEAVE_HASHLIB(self);
137 }
138 
139 /* External methods for a hash object */
140 
141 PyDoc_STRVAR(EVP_copy__doc__, "Return a copy of the hash object.");
142 
143 
144 static PyObject *
EVP_copy(EVPobject * self,PyObject * unused)145 EVP_copy(EVPobject *self, PyObject *unused)
146 {
147     EVPobject *newobj;
148 
149     if ( (newobj = newEVPobject(self->name))==NULL)
150         return NULL;
151 
152     locked_EVP_MD_CTX_copy(&newobj->ctx, self);
153     return (PyObject *)newobj;
154 }
155 
156 PyDoc_STRVAR(EVP_digest__doc__,
157 "Return the digest value as a string of binary data.");
158 
159 static PyObject *
EVP_digest(EVPobject * self,PyObject * unused)160 EVP_digest(EVPobject *self, PyObject *unused)
161 {
162     unsigned char digest[EVP_MAX_MD_SIZE];
163     EVP_MD_CTX temp_ctx;
164     PyObject *retval;
165     unsigned int digest_size;
166 
167     locked_EVP_MD_CTX_copy(&temp_ctx, self);
168     digest_size = EVP_MD_CTX_size(&temp_ctx);
169     EVP_DigestFinal(&temp_ctx, digest, NULL);
170 
171     retval = PyString_FromStringAndSize((const char *)digest, digest_size);
172     EVP_MD_CTX_cleanup(&temp_ctx);
173     return retval;
174 }
175 
176 PyDoc_STRVAR(EVP_hexdigest__doc__,
177 "Return the digest value as a string of hexadecimal digits.");
178 
179 static PyObject *
EVP_hexdigest(EVPobject * self,PyObject * unused)180 EVP_hexdigest(EVPobject *self, PyObject *unused)
181 {
182     unsigned char digest[EVP_MAX_MD_SIZE];
183     EVP_MD_CTX temp_ctx;
184     PyObject *retval;
185     char *hex_digest;
186     unsigned int i, j, digest_size;
187 
188     /* Get the raw (binary) digest value */
189     locked_EVP_MD_CTX_copy(&temp_ctx, self);
190     digest_size = EVP_MD_CTX_size(&temp_ctx);
191     EVP_DigestFinal(&temp_ctx, digest, NULL);
192 
193     EVP_MD_CTX_cleanup(&temp_ctx);
194 
195     /* Create a new string */
196     /* NOTE: not thread safe! modifying an already created string object */
197     /* (not a problem because we hold the GIL by default) */
198     retval = PyString_FromStringAndSize(NULL, digest_size * 2);
199     if (!retval)
200             return NULL;
201     hex_digest = PyString_AsString(retval);
202     if (!hex_digest) {
203             Py_DECREF(retval);
204             return NULL;
205     }
206 
207     /* Make hex version of the digest */
208     for(i=j=0; i<digest_size; i++) {
209         char c;
210         c = (digest[i] >> 4) & 0xf;
211         c = (c>9) ? c+'a'-10 : c + '0';
212         hex_digest[j++] = c;
213         c = (digest[i] & 0xf);
214         c = (c>9) ? c+'a'-10 : c + '0';
215         hex_digest[j++] = c;
216     }
217     return retval;
218 }
219 
220 PyDoc_STRVAR(EVP_update__doc__,
221 "Update this hash object's state with the provided string.");
222 
223 static PyObject *
EVP_update(EVPobject * self,PyObject * args)224 EVP_update(EVPobject *self, PyObject *args)
225 {
226     Py_buffer view;
227 
228     if (!PyArg_ParseTuple(args, "s*:update", &view))
229         return NULL;
230 
231 #ifdef WITH_THREAD
232     if (self->lock == NULL && view.len >= HASHLIB_GIL_MINSIZE) {
233         self->lock = PyThread_allocate_lock();
234         /* fail? lock = NULL and we fail over to non-threaded code. */
235     }
236 
237     if (self->lock != NULL) {
238         Py_BEGIN_ALLOW_THREADS
239         PyThread_acquire_lock(self->lock, 1);
240         EVP_hash(self, view.buf, view.len);
241         PyThread_release_lock(self->lock);
242         Py_END_ALLOW_THREADS
243     }
244     else
245 #endif
246     {
247         EVP_hash(self, view.buf, view.len);
248     }
249 
250     PyBuffer_Release(&view);
251 
252     Py_RETURN_NONE;
253 }
254 
255 static PyMethodDef EVP_methods[] = {
256     {"update",    (PyCFunction)EVP_update,    METH_VARARGS, EVP_update__doc__},
257     {"digest",    (PyCFunction)EVP_digest,    METH_NOARGS,  EVP_digest__doc__},
258     {"hexdigest", (PyCFunction)EVP_hexdigest, METH_NOARGS,  EVP_hexdigest__doc__},
259     {"copy",      (PyCFunction)EVP_copy,      METH_NOARGS,  EVP_copy__doc__},
260     {NULL,        NULL}         /* sentinel */
261 };
262 
263 static PyObject *
EVP_get_block_size(EVPobject * self,void * closure)264 EVP_get_block_size(EVPobject *self, void *closure)
265 {
266     long block_size;
267     block_size = EVP_MD_CTX_block_size(&self->ctx);
268     return PyLong_FromLong(block_size);
269 }
270 
271 static PyObject *
EVP_get_digest_size(EVPobject * self,void * closure)272 EVP_get_digest_size(EVPobject *self, void *closure)
273 {
274     long size;
275     size = EVP_MD_CTX_size(&self->ctx);
276     return PyLong_FromLong(size);
277 }
278 
279 static PyMemberDef EVP_members[] = {
280     {"name", T_OBJECT, offsetof(EVPobject, name), READONLY, PyDoc_STR("algorithm name.")},
281     {NULL}  /* Sentinel */
282 };
283 
284 static PyGetSetDef EVP_getseters[] = {
285     {"digest_size",
286      (getter)EVP_get_digest_size, NULL,
287      NULL,
288      NULL},
289     {"block_size",
290      (getter)EVP_get_block_size, NULL,
291      NULL,
292      NULL},
293     /* the old md5 and sha modules support 'digest_size' as in PEP 247.
294      * the old sha module also supported 'digestsize'.  ugh. */
295     {"digestsize",
296      (getter)EVP_get_digest_size, NULL,
297      NULL,
298      NULL},
299     {NULL}  /* Sentinel */
300 };
301 
302 
303 static PyObject *
EVP_repr(PyObject * self)304 EVP_repr(PyObject *self)
305 {
306     char buf[100];
307     PyOS_snprintf(buf, sizeof(buf), "<%s HASH object @ %p>",
308             PyString_AsString(((EVPobject *)self)->name), self);
309     return PyString_FromString(buf);
310 }
311 
312 #if HASH_OBJ_CONSTRUCTOR
313 static int
EVP_tp_init(EVPobject * self,PyObject * args,PyObject * kwds)314 EVP_tp_init(EVPobject *self, PyObject *args, PyObject *kwds)
315 {
316     static char *kwlist[] = {"name", "string", NULL};
317     PyObject *name_obj = NULL;
318     Py_buffer view = { 0 };
319     char *nameStr;
320     const EVP_MD *digest;
321 
322     if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|s*:HASH", kwlist,
323                                      &name_obj, &view)) {
324         return -1;
325     }
326 
327     if (!PyArg_Parse(name_obj, "s", &nameStr)) {
328         PyErr_SetString(PyExc_TypeError, "name must be a string");
329         PyBuffer_Release(&view);
330         return -1;
331     }
332 
333     digest = EVP_get_digestbyname(nameStr);
334     if (!digest) {
335         PyErr_SetString(PyExc_ValueError, "unknown hash function");
336         PyBuffer_Release(&view);
337         return -1;
338     }
339     EVP_DigestInit(&self->ctx, digest);
340 
341     self->name = name_obj;
342     Py_INCREF(self->name);
343 
344     if (view.obj) {
345         if (view.len >= HASHLIB_GIL_MINSIZE) {
346             Py_BEGIN_ALLOW_THREADS
347             EVP_hash(self, view.buf, view.len);
348             Py_END_ALLOW_THREADS
349         } else {
350             EVP_hash(self, view.buf, view.len);
351         }
352         PyBuffer_Release(&view);
353     }
354 
355     return 0;
356 }
357 #endif
358 
359 
360 PyDoc_STRVAR(hashtype_doc,
361 "A hash represents the object used to calculate a checksum of a\n\
362 string of information.\n\
363 \n\
364 Methods:\n\
365 \n\
366 update() -- updates the current digest with an additional string\n\
367 digest() -- return the current digest value\n\
368 hexdigest() -- return the current digest as a string of hexadecimal digits\n\
369 copy() -- return a copy of the current hash object\n\
370 \n\
371 Attributes:\n\
372 \n\
373 name -- the hash algorithm being used by this object\n\
374 digest_size -- number of bytes in this hashes output\n");
375 
376 static PyTypeObject EVPtype = {
377     PyVarObject_HEAD_INIT(NULL, 0)
378     "_hashlib.HASH",    /*tp_name*/
379     sizeof(EVPobject),  /*tp_basicsize*/
380     0,                  /*tp_itemsize*/
381     /* methods */
382     (destructor)EVP_dealloc,    /*tp_dealloc*/
383     0,                  /*tp_print*/
384     0,                  /*tp_getattr*/
385     0,                  /*tp_setattr*/
386     0,                  /*tp_compare*/
387     EVP_repr,           /*tp_repr*/
388     0,                  /*tp_as_number*/
389     0,                  /*tp_as_sequence*/
390     0,                  /*tp_as_mapping*/
391     0,                  /*tp_hash*/
392     0,                  /*tp_call*/
393     0,                  /*tp_str*/
394     0,                  /*tp_getattro*/
395     0,                  /*tp_setattro*/
396     0,                  /*tp_as_buffer*/
397     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
398     hashtype_doc,       /*tp_doc*/
399     0,                  /*tp_traverse*/
400     0,                  /*tp_clear*/
401     0,                  /*tp_richcompare*/
402     0,                  /*tp_weaklistoffset*/
403     0,                  /*tp_iter*/
404     0,                  /*tp_iternext*/
405     EVP_methods,        /* tp_methods */
406     EVP_members,        /* tp_members */
407     EVP_getseters,      /* tp_getset */
408 #if 1
409     0,                  /* tp_base */
410     0,                  /* tp_dict */
411     0,                  /* tp_descr_get */
412     0,                  /* tp_descr_set */
413     0,                  /* tp_dictoffset */
414 #endif
415 #if HASH_OBJ_CONSTRUCTOR
416     (initproc)EVP_tp_init, /* tp_init */
417 #endif
418 };
419 
420 static PyObject *
EVPnew(PyObject * name_obj,const EVP_MD * digest,const EVP_MD_CTX * initial_ctx,const unsigned char * cp,Py_ssize_t len)421 EVPnew(PyObject *name_obj,
422        const EVP_MD *digest, const EVP_MD_CTX *initial_ctx,
423        const unsigned char *cp, Py_ssize_t len)
424 {
425     EVPobject *self;
426 
427     if (!digest && !initial_ctx) {
428         PyErr_SetString(PyExc_ValueError, "unsupported hash type");
429         return NULL;
430     }
431 
432     if ((self = newEVPobject(name_obj)) == NULL)
433         return NULL;
434 
435     if (initial_ctx) {
436         EVP_MD_CTX_copy(&self->ctx, initial_ctx);
437     } else {
438         EVP_DigestInit(&self->ctx, digest);
439     }
440 
441     if (cp && len) {
442         if (len >= HASHLIB_GIL_MINSIZE) {
443             Py_BEGIN_ALLOW_THREADS
444             EVP_hash(self, cp, len);
445             Py_END_ALLOW_THREADS
446         } else {
447             EVP_hash(self, cp, len);
448         }
449     }
450 
451     return (PyObject *)self;
452 }
453 
454 
455 /* The module-level function: new() */
456 
457 PyDoc_STRVAR(EVP_new__doc__,
458 "Return a new hash object using the named algorithm.\n\
459 An optional string argument may be provided and will be\n\
460 automatically hashed.\n\
461 \n\
462 The MD5 and SHA1 algorithms are always supported.\n");
463 
464 static PyObject *
EVP_new(PyObject * self,PyObject * args,PyObject * kwdict)465 EVP_new(PyObject *self, PyObject *args, PyObject *kwdict)
466 {
467     static char *kwlist[] = {"name", "string", NULL};
468     PyObject *name_obj = NULL;
469     Py_buffer view = { 0 };
470     PyObject *ret_obj;
471     char *name;
472     const EVP_MD *digest;
473 
474     if (!PyArg_ParseTupleAndKeywords(args, kwdict, "O|s*:new", kwlist,
475                                      &name_obj, &view)) {
476         return NULL;
477     }
478 
479     if (!PyArg_Parse(name_obj, "s", &name)) {
480         PyErr_SetString(PyExc_TypeError, "name must be a string");
481         return NULL;
482     }
483 
484     digest = EVP_get_digestbyname(name);
485 
486     ret_obj = EVPnew(name_obj, digest, NULL, (unsigned char*)view.buf,
487                      view.len);
488     PyBuffer_Release(&view);
489 
490     return ret_obj;
491 }
492 
493 /*
494  *  This macro generates constructor function definitions for specific
495  *  hash algorithms.  These constructors are much faster than calling
496  *  the generic one passing it a python string and are noticably
497  *  faster than calling a python new() wrapper.  Thats important for
498  *  code that wants to make hashes of a bunch of small strings.
499  */
500 #define GEN_CONSTRUCTOR(NAME)  \
501     static PyObject * \
502     EVP_new_ ## NAME (PyObject *self, PyObject *args) \
503     { \
504         Py_buffer view = { 0 }; \
505         PyObject *ret_obj; \
506      \
507         if (!PyArg_ParseTuple(args, "|s*:" #NAME , &view)) { \
508             return NULL; \
509         } \
510      \
511         ret_obj = EVPnew( \
512                     CONST_ ## NAME ## _name_obj, \
513                     NULL, \
514                     CONST_new_ ## NAME ## _ctx_p, \
515                     (unsigned char*)view.buf, view.len); \
516         PyBuffer_Release(&view); \
517         return ret_obj; \
518     }
519 
520 /* a PyMethodDef structure for the constructor */
521 #define CONSTRUCTOR_METH_DEF(NAME)  \
522     {"openssl_" #NAME, (PyCFunction)EVP_new_ ## NAME, METH_VARARGS, \
523         PyDoc_STR("Returns a " #NAME \
524                   " hash object; optionally initialized with a string") \
525     }
526 
527 /* used in the init function to setup a constructor */
528 #define INIT_CONSTRUCTOR_CONSTANTS(NAME)  do { \
529     CONST_ ## NAME ## _name_obj = PyString_FromString(#NAME); \
530     if (EVP_get_digestbyname(#NAME)) { \
531         CONST_new_ ## NAME ## _ctx_p = &CONST_new_ ## NAME ## _ctx; \
532         EVP_DigestInit(CONST_new_ ## NAME ## _ctx_p, EVP_get_digestbyname(#NAME)); \
533     } \
534 } while (0);
535 
536 GEN_CONSTRUCTOR(md5)
537 GEN_CONSTRUCTOR(sha1)
538 #ifdef _OPENSSL_SUPPORTS_SHA2
539 GEN_CONSTRUCTOR(sha224)
540 GEN_CONSTRUCTOR(sha256)
541 GEN_CONSTRUCTOR(sha384)
542 GEN_CONSTRUCTOR(sha512)
543 #endif
544 
545 /* List of functions exported by this module */
546 
547 static struct PyMethodDef EVP_functions[] = {
548     {"new", (PyCFunction)EVP_new, METH_VARARGS|METH_KEYWORDS, EVP_new__doc__},
549     CONSTRUCTOR_METH_DEF(md5),
550     CONSTRUCTOR_METH_DEF(sha1),
551 #ifdef _OPENSSL_SUPPORTS_SHA2
552     CONSTRUCTOR_METH_DEF(sha224),
553     CONSTRUCTOR_METH_DEF(sha256),
554     CONSTRUCTOR_METH_DEF(sha384),
555     CONSTRUCTOR_METH_DEF(sha512),
556 #endif
557     {NULL,      NULL}            /* Sentinel */
558 };
559 
560 
561 /* Initialize this module. */
562 
563 PyMODINIT_FUNC
init_hashlib(void)564 init_hashlib(void)
565 {
566     PyObject *m;
567 
568     OpenSSL_add_all_digests();
569 
570     /* TODO build EVP_functions openssl_* entries dynamically based
571      * on what hashes are supported rather than listing many
572      * but having some be unsupported.  Only init appropriate
573      * constants. */
574 
575     Py_TYPE(&EVPtype) = &PyType_Type;
576     if (PyType_Ready(&EVPtype) < 0)
577         return;
578 
579     m = Py_InitModule("_hashlib", EVP_functions);
580     if (m == NULL)
581         return;
582 
583 #if HASH_OBJ_CONSTRUCTOR
584     Py_INCREF(&EVPtype);
585     PyModule_AddObject(m, "HASH", (PyObject *)&EVPtype);
586 #endif
587 
588     /* these constants are used by the convenience constructors */
589     INIT_CONSTRUCTOR_CONSTANTS(md5);
590     INIT_CONSTRUCTOR_CONSTANTS(sha1);
591 #ifdef _OPENSSL_SUPPORTS_SHA2
592     INIT_CONSTRUCTOR_CONSTANTS(sha224);
593     INIT_CONSTRUCTOR_CONSTANTS(sha256);
594     INIT_CONSTRUCTOR_CONSTANTS(sha384);
595     INIT_CONSTRUCTOR_CONSTANTS(sha512);
596 #endif
597 }
598