1 /*
2  * ****************************************************************************
3  * Copyright (c) 2013-2019, PyInstaller Development Team.
4  * Distributed under the terms of the GNU General Public License with exception
5  * for distributing bootloader.
6  *
7  * The full license is in the file COPYING.txt, distributed with this software.
8  * ****************************************************************************
9  */
10 
11 /*
12  * Fuctions related to PyInstaller archive embedded in executable.
13  */
14 
15 /* TODO: use safe string functions */
16 #define _CRT_SECURE_NO_WARNINGS 1
17 
18 #ifdef _WIN32
19 /* TODO verify windows includes */
20     #include <winsock.h>  /* ntohl */
21 #else
22     #include <limits.h>  /* PATH_MAX - not available on windows. */
23     #if defined(__FreeBSD__) || defined(__DragonFly__)
24 /* freebsd issue #188316 */
25         #include <arpa/inet.h>  /* ntohl */
26     #else
27         #include <netinet/in.h>  /* ntohl */
28     #endif
29     #include <stdlib.h>   /* malloc */
30     #include <string.h>   /* strncmp, strcpy, strcat */
31     #include <sys/stat.h> /* fchmod */
32 #endif /* ifdef _WIN32 */
33 #include <stddef.h>  /* ptrdiff_t */
34 #include <stdio.h>
35 
36 /* PyInstaller headers. */
37 #include "zlib.h"
38 #include "pyi_global.h"
39 #include "pyi_path.h"
40 #include "pyi_archive.h"
41 #include "pyi_utils.h"
42 #include "pyi_python.h"
43 
44 int pyvers = 0;
45 
46 /* Magic number to verify archive data are bundled correctly. */
47 #define MAGIC "MEI\014\013\012\013\016"
48 
49 /*
50  * Return pointer to next toc entry.
51  */
52 TOC *
pyi_arch_increment_toc_ptr(const ARCHIVE_STATUS * status,const TOC * ptoc)53 pyi_arch_increment_toc_ptr(const ARCHIVE_STATUS *status, const TOC* ptoc)
54 {
55     TOC *result = (TOC*)((char *)ptoc + ntohl(ptoc->structlen));
56 
57     if (result < status->tocbuff) {
58         FATALERROR("Cannot read Table of Contents.\n");
59         return status->tocend;
60     }
61     return result;
62 }
63 
64 /*
65  * Open archive file if needed
66  */
67 static int
pyi_arch_open_fp(ARCHIVE_STATUS * status)68 pyi_arch_open_fp(ARCHIVE_STATUS *status)
69 {
70     if (status->fp == NULL) {
71         status->fp = pyi_path_fopen(status->archivename, "rb");
72 
73         if (status->fp == NULL) {
74             return -1;
75         }
76     }
77     return 0;
78 }
79 
80 /*
81  * Close archive file
82  * File should close after unused to avoid locking
83  */
84 static void
pyi_arch_close_fp(ARCHIVE_STATUS * status)85 pyi_arch_close_fp(ARCHIVE_STATUS *status)
86 {
87     if (status->fp != NULL) {
88         pyi_path_fclose(status->fp);
89         status->fp = NULL;
90     }
91 }
92 
93 /*
94  * Decompress data in buff, described by ptoc.
95  * Return in malloc'ed buffer (needs to be freed)
96  */
97 static unsigned char *
decompress(unsigned char * buff,TOC * ptoc)98 decompress(unsigned char * buff, TOC *ptoc)
99 {
100     const char *ver;
101     unsigned char *out;
102     z_stream zstream;
103     int rc;
104 
105     ver = (zlibVersion)();
106     out = (unsigned char *)malloc(ntohl(ptoc->ulen));
107 
108     if (out == NULL) {
109         OTHERERROR("Error allocating decompression buffer\n");
110         return NULL;
111     }
112 
113     zstream.zalloc = NULL;
114     zstream.zfree = NULL;
115     zstream.opaque = NULL;
116     zstream.next_in = buff;
117     zstream.avail_in = ntohl(ptoc->len);
118     zstream.next_out = out;
119     zstream.avail_out = ntohl(ptoc->ulen);
120     rc = inflateInit(&zstream);
121 
122     if (rc >= 0) {
123         rc = (inflate)(&zstream, Z_FINISH);
124 
125         if (rc >= 0) {
126             rc = (inflateEnd)(&zstream);
127         }
128         else {
129             OTHERERROR("Error %d from inflate: %s\n", rc, zstream.msg);
130             return NULL;
131         }
132     }
133     else {
134         OTHERERROR("Error %d from inflateInit: %s\n", rc, zstream.msg);
135         return NULL;
136     }
137 
138     return out;
139 }
140 
141 /*
142  * Extract an archive entry.
143  * Returns pointer to the data (must be freed).
144  */
145 unsigned char *
pyi_arch_extract(ARCHIVE_STATUS * status,TOC * ptoc)146 pyi_arch_extract(ARCHIVE_STATUS *status, TOC *ptoc)
147 {
148     unsigned char *data;
149     unsigned char *tmp;
150 
151     if (pyi_arch_open_fp(status) != 0) {
152         OTHERERROR("Cannot open archive file\n");
153         return NULL;
154     }
155 
156     fseek(status->fp, status->pkgstart + ntohl(ptoc->pos), SEEK_SET);
157     data = (unsigned char *)malloc(ntohl(ptoc->len));
158 
159     if (data == NULL) {
160         OTHERERROR("Could not allocate read buffer\n");
161         return NULL;
162     }
163 
164     if (fread(data, ntohl(ptoc->len), 1, status->fp) < 1) {
165         OTHERERROR("Could not read from file\n");
166         free(data);
167         return NULL;
168     }
169 
170     if (ptoc->cflag == '\1') {
171         tmp = decompress(data, ptoc);
172         free(data);
173         data = tmp;
174 
175         if (data == NULL) {
176             OTHERERROR("Error decompressing %s\n", ptoc->name);
177             return NULL;
178         }
179     }
180 
181     pyi_arch_close_fp(status);
182     return data;
183 }
184 
185 /*
186  * Extract from the archive and copy to the filesystem.
187  * The path is relative to the directory the archive is in.
188  */
189 int
pyi_arch_extract2fs(ARCHIVE_STATUS * status,TOC * ptoc)190 pyi_arch_extract2fs(ARCHIVE_STATUS *status, TOC *ptoc)
191 {
192     FILE *out;
193     size_t result, len;
194     unsigned char *data = pyi_arch_extract(status, ptoc);
195 
196     /* Create tmp dir _MEIPASSxxx. */
197     if (pyi_create_temp_path(status) == -1) {
198         return -1;
199     }
200 
201     out = pyi_open_target(status->temppath, ptoc->name);
202     len = ntohl(ptoc->ulen);
203 
204     if (out == NULL) {
205         FATAL_PERROR("fopen", "%s could not be extracted!\n", ptoc->name);
206         return -1;
207     }
208     else {
209         result = fwrite(data, len, 1, out);
210 
211         if ((1 != result) && (len > 0)) {
212             FATAL_PERROR("fwrite", "Failed to write all bytes for %s\n", ptoc->name);
213             return -1;
214         }
215 #ifndef WIN32
216         fchmod(fileno(out), S_IRUSR | S_IWUSR | S_IXUSR);
217 #endif
218         fclose(out);
219     }
220     free(data);
221 
222     return 0;
223 }
224 
225 /*
226  * Look for the predefined string MAGIC in the embedded data before the given
227  * search end position. If MAGIC is found, copies the entire COOKIE struct into
228  * status->cookie, sets status->pkgstart to the location of the archive and returns 0.
229  * Returns -1 on failure.
230  *
231  * PyInstaller sets this cookie to a constant value. Bootloader
232  * compares it with the expected value. If there is match then
233  * bootloader knows where the data was embedded correctly.
234  *
235  * The search space uses the given sizes because on Windows and OS X, the code signing
236  * will add padding between the end of the COOKIE and the beginning of the signature
237  * to align the signature to a quadword or a page boundary respectively. On Linux,
238  * we use objtool to insert the archive into the bootloader, and objtool will
239  * move the ELF section headers so they follow the cookie, so we need to search backward
240  * past the section headers to find the cookie.
241  */
242 #if defined(WIN32)
243 #define SEARCH_SIZE (8 + sizeof(COOKIE))
244 #else
245 #define SEARCH_SIZE (4096 + sizeof(COOKIE))
246 #endif
247 
248 static int
pyi_arch_find_cookie(ARCHIVE_STATUS * status,int search_end)249 pyi_arch_find_cookie(ARCHIVE_STATUS *status, int search_end)
250 {
251     int search_start = search_end - SEARCH_SIZE;
252     char buf[SEARCH_SIZE];
253     char * search_ptr = buf + SEARCH_SIZE - sizeof(COOKIE);
254 
255     if (fseek(status->fp, search_start, SEEK_SET)) {
256         return -1;
257     }
258 
259     /* Read the entire search space */
260     if (fread(buf, SEARCH_SIZE, 1, status->fp) < 1) {
261         return -1;
262     }
263 
264     /* Search for MAGIC within search space */
265 
266     while(search_ptr >= buf) {
267         if(0 == strncmp(MAGIC, search_ptr, strlen(MAGIC))) {
268             /* MAGIC found - Copy COOKIE to status->cookie */
269             memcpy(&status->cookie, search_ptr, sizeof(COOKIE));
270 
271             /* From the cookie, calculate the archive start */
272             status->pkgstart = search_start + sizeof(COOKIE) + (search_ptr - buf) - ntohl(status->cookie.len);
273 
274             return 0;
275         }
276         search_ptr--;
277     }
278 
279     return -1;
280 }
281 
282 static int
findDigitalSignature(ARCHIVE_STATUS * const status)283 findDigitalSignature(ARCHIVE_STATUS * const status)
284 {
285 #ifdef _WIN32
286     /* There might be a digital signature attached. Let's see. */
287     char buf[2];
288     int offset = 0, signature_offset = 0;
289     fseek(status->fp, 0, SEEK_SET);
290     fread(buf, 1, 2, status->fp);
291 
292     if (!(buf[0] == 'M' && buf[1] == 'Z')) {
293         return -1;
294     }
295     /* Skip MSDOS header */
296     fseek(status->fp, 60, SEEK_SET);
297     /* Read offset to PE header */
298     fread(&offset, 4, 1, status->fp);
299     fseek(status->fp, offset + 24, SEEK_SET);
300     fread(buf, 2, 1, status->fp);
301 
302     if (buf[0] == 0x0b && buf[1] == 0x01) {
303         /* 32 bit binary */
304         signature_offset = 152;
305     }
306     else if (buf[0] == 0x0b && buf[1] == 0x02) {
307         /* 64 bit binary */
308         signature_offset = 168;
309     }
310     else {
311         /* Invalid magic value */
312         VS("LOADER: Could not find a valid magic value (was %x %x).\n",
313            (unsigned int) buf[0], (unsigned int) buf[1]);
314         return -1;
315     }
316 
317     /* Jump to the fields that contain digital signature info */
318     fseek(status->fp, offset + signature_offset, SEEK_SET);
319     fread(&offset, 4, 1, status->fp);
320 
321     if (offset == 0) {
322         return -1;
323     }
324     VS("LOADER: %s contains a digital signature\n", status->archivename);
325     return offset;
326 #elif defined(__APPLE__)
327     /* We inspect the Mach-O header to find a code signature
328      *  https://developer.apple.com/library/mac/documentation/DeveloperTools/Conceptual/MachORuntime/
329      *  1) Determine the length of the header
330      *  2) Read the Mach-O Header to determine how many commands there are
331      *  3) Read through the commands and look for a code signature section (command #29)
332      *  4) If we find a one, return where it starts */
333 
334     uint32_t magic_value;
335     uint32_t header_size;
336 
337     uint32_t load_size;
338     uint32_t cmd;
339     uint32_t cmd_size;
340     uint32_t offset = -1;
341 
342     /* The first 4 bytes determine the header length */
343     fseek(status->fp, 0, SEEK_SET);
344     fread(&magic_value, sizeof(uint32_t), 1, status->fp);
345 
346     if (magic_value == 0xfeedface || magic_value == 0xcefaedfe) {
347         /* 32-bit, so the header size is 28 bytes. */
348         header_size = 28;
349     }
350     else {
351         /* 64-bit, so the header size is 32 bytes. */
352         header_size = 32;
353     }
354 
355     /* Determine the total size of all load commands */
356     fseek(status->fp, 20, SEEK_SET);
357     fread(&load_size, sizeof(uint32_t), 1, status->fp);
358 
359     fseek(status->fp, header_size, SEEK_SET);
360 
361     while (ftell(status->fp) < (header_size + load_size)) {
362         fread(&cmd, sizeof(uint32_t), 1, status->fp);
363         fread(&cmd_size, sizeof(uint32_t), 1, status->fp);
364 
365         if (cmd == 29) {
366             /* Code signatures are command 29.
367              *  Our archive ends right before the signature */
368             fread(&offset, sizeof(uint32_t), 1, status->fp);
369             VS("LOADER: %s contains a digital signature\n", status->archivename);
370             break;
371         }
372         fseek(status->fp, cmd_size - 8, SEEK_CUR);
373     }
374     return offset;
375 #else /* ifdef _WIN32 */
376     return -1;
377 #endif /* ifdef _WIN32 */
378 }
379 
380 /*
381  * Open the archive.
382  * Sets f_archiveFile, f_pkgstart, f_tocbuff and f_cookie.
383  */
384 int
pyi_arch_open(ARCHIVE_STATUS * status)385 pyi_arch_open(ARCHIVE_STATUS *status)
386 {
387     int search_end = 0;
388     VS("LOADER: archivename is %s\n", status->archivename);
389 
390     /* Physically open the file */
391     if (pyi_arch_open_fp(status) != 0) {
392         VS("LOADER: Cannot open archive: %s\n", status->archivename);
393         return -1;
394     }
395 
396     /* Find out where to stop searching for the cookie. First try to find
397      * a digital signature added by a code signing tool.
398      */
399 #if defined(WIN32) || defined(__APPLE__)
400     search_end = findDigitalSignature(status);
401 #endif
402 
403     /* Signature not found or not applicable for this platform. Stop searching
404      * at end of file.
405      */
406     if (search_end < 1) {
407         fseek(status->fp, 0, SEEK_END);
408         search_end = ftell(status->fp);
409     }
410 
411     /* Load status->cookie */
412     if (-1 == pyi_arch_find_cookie(status, search_end)) {
413         VS("Loader: Cannot find cookie");
414         return -1;
415     }
416 
417     /* Set the flag that Python library was not loaded yet. */
418     status->is_pylib_loaded = false;
419 
420     /* Set the the Python version used. */
421     pyvers = pyi_arch_get_pyversion(status);
422 
423     /* Read in in the table of contents */
424     fseek(status->fp, status->pkgstart + ntohl(status->cookie.TOC), SEEK_SET);
425     status->tocbuff = (TOC *) malloc(ntohl(status->cookie.TOClen));
426 
427     if (status->tocbuff == NULL) {
428         FATAL_PERROR("malloc", "Could not allocate buffer for TOC.");
429         return -1;
430     }
431 
432     if (fread(status->tocbuff, ntohl(status->cookie.TOClen), 1, status->fp) < 1) {
433         FATAL_PERROR("fread", "Could not read from file.");
434         return -1;
435     }
436     status->tocend = (TOC *) (((char *)status->tocbuff) + ntohl(status->cookie.TOClen));
437 
438     /* Check input file is still ok (should be). */
439     if (ferror(status->fp)) {
440         FATALERROR("Error on file\n.");
441         return -1;
442     }
443 
444     /* Close file handler
445      * if file not close here it will be close in pyi_arch_status_free_memory */
446     pyi_arch_close_fp(status);
447     return 0;
448 }
449 
450 /*
451  * Set up paths required by rest of this module.
452  * Sets f_archivename, f_homepath, f_mainpath
453  */
454 int
pyi_arch_set_paths(ARCHIVE_STATUS * status,char const * archivePath,char const * archiveName)455 pyi_arch_set_paths(ARCHIVE_STATUS *status, char const * archivePath,
456                    char const * archiveName)
457 {
458     size_t pathlen, namelen;
459 
460     pathlen = strnlen(archivePath, PATH_MAX);
461     namelen = strnlen(archiveName, PATH_MAX);
462 
463     if (pathlen+namelen+1 > PATH_MAX) {
464         return -1;
465     }
466 
467     /* Get the archive Path */
468     strcpy(status->archivename, archivePath);
469     strcat(status->archivename, archiveName);
470 
471     /* Set homepath to where the archive is */
472     strcpy(status->homepath, archivePath);
473 
474     /*
475      * Initial value of mainpath is homepath. It might be overriden
476      * by temppath if it is available.
477      */
478     status->has_temp_directory = false;
479     strcpy(status->mainpath, status->homepath);
480 
481     return 0;
482 }
483 
484 /* Setup the archive with python modules. (this always needs to be done) */
485 int
pyi_arch_setup(ARCHIVE_STATUS * status,char const * archivePath,char const * archiveName)486 pyi_arch_setup(ARCHIVE_STATUS *status, char const * archivePath, char const * archiveName)
487 {
488     /* Set up paths */
489     if (pyi_arch_set_paths(status, archivePath, archiveName)) {
490         return -1;
491     }
492 
493     /* Open the archive */
494     if (pyi_arch_open(status)) {
495         /* If this is not an archive, we MUST close the file, */
496         /* otherwise the open file-handle will be reused when */
497         /* testing the next file. */
498         pyi_arch_close_fp(status);
499         return -1;
500     }
501     ;
502     return 0;
503 }
504 
505 /*
506  * external API for iterating TOCs
507  */
508 TOC *
getFirstTocEntry(ARCHIVE_STATUS * status)509 getFirstTocEntry(ARCHIVE_STATUS *status)
510 {
511     return status->tocbuff;
512 }
513 TOC *
getNextTocEntry(ARCHIVE_STATUS * status,TOC * entry)514 getNextTocEntry(ARCHIVE_STATUS *status, TOC *entry)
515 {
516     TOC *rslt = (TOC*)((char *)entry + ntohl(entry->structlen));
517 
518     if (rslt >= status->tocend) {
519         return NULL;
520     }
521     return rslt;
522 }
523 
524 /*
525  * Helpers for embedders.
526  */
527 int
pyi_arch_get_pyversion(ARCHIVE_STATUS * status)528 pyi_arch_get_pyversion(ARCHIVE_STATUS *status)
529 {
530     return ntohl(status->cookie.pyvers);
531 }
532 
533 /*
534  * Free memory allocated for archive status.
535  */
536 void
pyi_arch_status_free_memory(ARCHIVE_STATUS * archive_status)537 pyi_arch_status_free_memory(ARCHIVE_STATUS *archive_status)
538 {
539     if (archive_status != NULL) {
540         VS("LOADER: Freeing archive status for %s\n", archive_status->archivename);
541 
542         /* Free the TOC memory from the archive status first. */
543         if (archive_status->tocbuff != NULL) {
544             free(archive_status->tocbuff);
545         }
546         /* Close file handler */
547         pyi_arch_close_fp(archive_status);
548         free(archive_status);
549     }
550 }
551 
552 /*
553  * Returns the value of the pyi bootloader option given by optname. Returns
554  * NULL if the option is not present. Returns an empty string if the option is present,
555  * but has no associated value.
556  *
557  * The string returned is owned by the ARCHIVE_STATUS; the caller is NOT responsible
558  * for freeing it.
559  */
560 char *
pyi_arch_get_option(const ARCHIVE_STATUS * status,char * optname)561 pyi_arch_get_option(const ARCHIVE_STATUS * status, char * optname)
562 {
563     /* TODO: option-cache? */
564     int optlen;
565     TOC *ptoc = status->tocbuff;
566 
567     optlen = strlen(optname);
568 
569     for (; ptoc < status->tocend; ptoc = pyi_arch_increment_toc_ptr(status, ptoc)) {
570         if (ptoc->typcd == ARCHIVE_ITEM_RUNTIME_OPTION) {
571             if (0 == strncmp(ptoc->name, optname, optlen)) {
572                 if (0 != ptoc->name[optlen]) {
573                     /* Space separates option name from option value, so add 1. */
574                     return ptoc->name + optlen + 1;
575                 }
576                 else {
577                     /* No option value, just return the empty string. */
578                     return ptoc->name + optlen;
579                 }
580 
581             }
582         }
583     }
584     return NULL;
585 }
586