1 /*
2 * ****************************************************************************
3 * Copyright (c) 2013-2019, PyInstaller Development Team.
4 * Distributed under the terms of the GNU General Public License with exception
5 * for distributing bootloader.
6 *
7 * The full license is in the file COPYING.txt, distributed with this software.
8 * ****************************************************************************
9 */
10
11 /*
12 * Fuctions related to PyInstaller archive embedded in executable.
13 */
14
15 /* TODO: use safe string functions */
16 #define _CRT_SECURE_NO_WARNINGS 1
17
18 #ifdef _WIN32
19 /* TODO verify windows includes */
20 #include <winsock.h> /* ntohl */
21 #else
22 #include <limits.h> /* PATH_MAX - not available on windows. */
23 #if defined(__FreeBSD__) || defined(__DragonFly__)
24 /* freebsd issue #188316 */
25 #include <arpa/inet.h> /* ntohl */
26 #else
27 #include <netinet/in.h> /* ntohl */
28 #endif
29 #include <stdlib.h> /* malloc */
30 #include <string.h> /* strncmp, strcpy, strcat */
31 #include <sys/stat.h> /* fchmod */
32 #endif /* ifdef _WIN32 */
33 #include <stddef.h> /* ptrdiff_t */
34 #include <stdio.h>
35
36 /* PyInstaller headers. */
37 #include "zlib.h"
38 #include "pyi_global.h"
39 #include "pyi_path.h"
40 #include "pyi_archive.h"
41 #include "pyi_utils.h"
42 #include "pyi_python.h"
43
44 int pyvers = 0;
45
46 /* Magic number to verify archive data are bundled correctly. */
47 #define MAGIC "MEI\014\013\012\013\016"
48
49 /*
50 * Return pointer to next toc entry.
51 */
52 TOC *
pyi_arch_increment_toc_ptr(const ARCHIVE_STATUS * status,const TOC * ptoc)53 pyi_arch_increment_toc_ptr(const ARCHIVE_STATUS *status, const TOC* ptoc)
54 {
55 TOC *result = (TOC*)((char *)ptoc + ntohl(ptoc->structlen));
56
57 if (result < status->tocbuff) {
58 FATALERROR("Cannot read Table of Contents.\n");
59 return status->tocend;
60 }
61 return result;
62 }
63
64 /*
65 * Open archive file if needed
66 */
67 static int
pyi_arch_open_fp(ARCHIVE_STATUS * status)68 pyi_arch_open_fp(ARCHIVE_STATUS *status)
69 {
70 if (status->fp == NULL) {
71 status->fp = pyi_path_fopen(status->archivename, "rb");
72
73 if (status->fp == NULL) {
74 return -1;
75 }
76 }
77 return 0;
78 }
79
80 /*
81 * Close archive file
82 * File should close after unused to avoid locking
83 */
84 static void
pyi_arch_close_fp(ARCHIVE_STATUS * status)85 pyi_arch_close_fp(ARCHIVE_STATUS *status)
86 {
87 if (status->fp != NULL) {
88 pyi_path_fclose(status->fp);
89 status->fp = NULL;
90 }
91 }
92
93 /*
94 * Decompress data in buff, described by ptoc.
95 * Return in malloc'ed buffer (needs to be freed)
96 */
97 static unsigned char *
decompress(unsigned char * buff,TOC * ptoc)98 decompress(unsigned char * buff, TOC *ptoc)
99 {
100 const char *ver;
101 unsigned char *out;
102 z_stream zstream;
103 int rc;
104
105 ver = (zlibVersion)();
106 out = (unsigned char *)malloc(ntohl(ptoc->ulen));
107
108 if (out == NULL) {
109 OTHERERROR("Error allocating decompression buffer\n");
110 return NULL;
111 }
112
113 zstream.zalloc = NULL;
114 zstream.zfree = NULL;
115 zstream.opaque = NULL;
116 zstream.next_in = buff;
117 zstream.avail_in = ntohl(ptoc->len);
118 zstream.next_out = out;
119 zstream.avail_out = ntohl(ptoc->ulen);
120 rc = inflateInit(&zstream);
121
122 if (rc >= 0) {
123 rc = (inflate)(&zstream, Z_FINISH);
124
125 if (rc >= 0) {
126 rc = (inflateEnd)(&zstream);
127 }
128 else {
129 OTHERERROR("Error %d from inflate: %s\n", rc, zstream.msg);
130 return NULL;
131 }
132 }
133 else {
134 OTHERERROR("Error %d from inflateInit: %s\n", rc, zstream.msg);
135 return NULL;
136 }
137
138 return out;
139 }
140
141 /*
142 * Extract an archive entry.
143 * Returns pointer to the data (must be freed).
144 */
145 unsigned char *
pyi_arch_extract(ARCHIVE_STATUS * status,TOC * ptoc)146 pyi_arch_extract(ARCHIVE_STATUS *status, TOC *ptoc)
147 {
148 unsigned char *data;
149 unsigned char *tmp;
150
151 if (pyi_arch_open_fp(status) != 0) {
152 OTHERERROR("Cannot open archive file\n");
153 return NULL;
154 }
155
156 fseek(status->fp, status->pkgstart + ntohl(ptoc->pos), SEEK_SET);
157 data = (unsigned char *)malloc(ntohl(ptoc->len));
158
159 if (data == NULL) {
160 OTHERERROR("Could not allocate read buffer\n");
161 return NULL;
162 }
163
164 if (fread(data, ntohl(ptoc->len), 1, status->fp) < 1) {
165 OTHERERROR("Could not read from file\n");
166 free(data);
167 return NULL;
168 }
169
170 if (ptoc->cflag == '\1') {
171 tmp = decompress(data, ptoc);
172 free(data);
173 data = tmp;
174
175 if (data == NULL) {
176 OTHERERROR("Error decompressing %s\n", ptoc->name);
177 return NULL;
178 }
179 }
180
181 pyi_arch_close_fp(status);
182 return data;
183 }
184
185 /*
186 * Extract from the archive and copy to the filesystem.
187 * The path is relative to the directory the archive is in.
188 */
189 int
pyi_arch_extract2fs(ARCHIVE_STATUS * status,TOC * ptoc)190 pyi_arch_extract2fs(ARCHIVE_STATUS *status, TOC *ptoc)
191 {
192 FILE *out;
193 size_t result, len;
194 unsigned char *data = pyi_arch_extract(status, ptoc);
195
196 /* Create tmp dir _MEIPASSxxx. */
197 if (pyi_create_temp_path(status) == -1) {
198 return -1;
199 }
200
201 out = pyi_open_target(status->temppath, ptoc->name);
202 len = ntohl(ptoc->ulen);
203
204 if (out == NULL) {
205 FATAL_PERROR("fopen", "%s could not be extracted!\n", ptoc->name);
206 return -1;
207 }
208 else {
209 result = fwrite(data, len, 1, out);
210
211 if ((1 != result) && (len > 0)) {
212 FATAL_PERROR("fwrite", "Failed to write all bytes for %s\n", ptoc->name);
213 return -1;
214 }
215 #ifndef WIN32
216 fchmod(fileno(out), S_IRUSR | S_IWUSR | S_IXUSR);
217 #endif
218 fclose(out);
219 }
220 free(data);
221
222 return 0;
223 }
224
225 /*
226 * Look for the predefined string MAGIC in the embedded data before the given
227 * search end position. If MAGIC is found, copies the entire COOKIE struct into
228 * status->cookie, sets status->pkgstart to the location of the archive and returns 0.
229 * Returns -1 on failure.
230 *
231 * PyInstaller sets this cookie to a constant value. Bootloader
232 * compares it with the expected value. If there is match then
233 * bootloader knows where the data was embedded correctly.
234 *
235 * The search space uses the given sizes because on Windows and OS X, the code signing
236 * will add padding between the end of the COOKIE and the beginning of the signature
237 * to align the signature to a quadword or a page boundary respectively. On Linux,
238 * we use objtool to insert the archive into the bootloader, and objtool will
239 * move the ELF section headers so they follow the cookie, so we need to search backward
240 * past the section headers to find the cookie.
241 */
242 #if defined(WIN32)
243 #define SEARCH_SIZE (8 + sizeof(COOKIE))
244 #else
245 #define SEARCH_SIZE (4096 + sizeof(COOKIE))
246 #endif
247
248 static int
pyi_arch_find_cookie(ARCHIVE_STATUS * status,int search_end)249 pyi_arch_find_cookie(ARCHIVE_STATUS *status, int search_end)
250 {
251 int search_start = search_end - SEARCH_SIZE;
252 char buf[SEARCH_SIZE];
253 char * search_ptr = buf + SEARCH_SIZE - sizeof(COOKIE);
254
255 if (fseek(status->fp, search_start, SEEK_SET)) {
256 return -1;
257 }
258
259 /* Read the entire search space */
260 if (fread(buf, SEARCH_SIZE, 1, status->fp) < 1) {
261 return -1;
262 }
263
264 /* Search for MAGIC within search space */
265
266 while(search_ptr >= buf) {
267 if(0 == strncmp(MAGIC, search_ptr, strlen(MAGIC))) {
268 /* MAGIC found - Copy COOKIE to status->cookie */
269 memcpy(&status->cookie, search_ptr, sizeof(COOKIE));
270
271 /* From the cookie, calculate the archive start */
272 status->pkgstart = search_start + sizeof(COOKIE) + (search_ptr - buf) - ntohl(status->cookie.len);
273
274 return 0;
275 }
276 search_ptr--;
277 }
278
279 return -1;
280 }
281
282 static int
findDigitalSignature(ARCHIVE_STATUS * const status)283 findDigitalSignature(ARCHIVE_STATUS * const status)
284 {
285 #ifdef _WIN32
286 /* There might be a digital signature attached. Let's see. */
287 char buf[2];
288 int offset = 0, signature_offset = 0;
289 fseek(status->fp, 0, SEEK_SET);
290 fread(buf, 1, 2, status->fp);
291
292 if (!(buf[0] == 'M' && buf[1] == 'Z')) {
293 return -1;
294 }
295 /* Skip MSDOS header */
296 fseek(status->fp, 60, SEEK_SET);
297 /* Read offset to PE header */
298 fread(&offset, 4, 1, status->fp);
299 fseek(status->fp, offset + 24, SEEK_SET);
300 fread(buf, 2, 1, status->fp);
301
302 if (buf[0] == 0x0b && buf[1] == 0x01) {
303 /* 32 bit binary */
304 signature_offset = 152;
305 }
306 else if (buf[0] == 0x0b && buf[1] == 0x02) {
307 /* 64 bit binary */
308 signature_offset = 168;
309 }
310 else {
311 /* Invalid magic value */
312 VS("LOADER: Could not find a valid magic value (was %x %x).\n",
313 (unsigned int) buf[0], (unsigned int) buf[1]);
314 return -1;
315 }
316
317 /* Jump to the fields that contain digital signature info */
318 fseek(status->fp, offset + signature_offset, SEEK_SET);
319 fread(&offset, 4, 1, status->fp);
320
321 if (offset == 0) {
322 return -1;
323 }
324 VS("LOADER: %s contains a digital signature\n", status->archivename);
325 return offset;
326 #elif defined(__APPLE__)
327 /* We inspect the Mach-O header to find a code signature
328 * https://developer.apple.com/library/mac/documentation/DeveloperTools/Conceptual/MachORuntime/
329 * 1) Determine the length of the header
330 * 2) Read the Mach-O Header to determine how many commands there are
331 * 3) Read through the commands and look for a code signature section (command #29)
332 * 4) If we find a one, return where it starts */
333
334 uint32_t magic_value;
335 uint32_t header_size;
336
337 uint32_t load_size;
338 uint32_t cmd;
339 uint32_t cmd_size;
340 uint32_t offset = -1;
341
342 /* The first 4 bytes determine the header length */
343 fseek(status->fp, 0, SEEK_SET);
344 fread(&magic_value, sizeof(uint32_t), 1, status->fp);
345
346 if (magic_value == 0xfeedface || magic_value == 0xcefaedfe) {
347 /* 32-bit, so the header size is 28 bytes. */
348 header_size = 28;
349 }
350 else {
351 /* 64-bit, so the header size is 32 bytes. */
352 header_size = 32;
353 }
354
355 /* Determine the total size of all load commands */
356 fseek(status->fp, 20, SEEK_SET);
357 fread(&load_size, sizeof(uint32_t), 1, status->fp);
358
359 fseek(status->fp, header_size, SEEK_SET);
360
361 while (ftell(status->fp) < (header_size + load_size)) {
362 fread(&cmd, sizeof(uint32_t), 1, status->fp);
363 fread(&cmd_size, sizeof(uint32_t), 1, status->fp);
364
365 if (cmd == 29) {
366 /* Code signatures are command 29.
367 * Our archive ends right before the signature */
368 fread(&offset, sizeof(uint32_t), 1, status->fp);
369 VS("LOADER: %s contains a digital signature\n", status->archivename);
370 break;
371 }
372 fseek(status->fp, cmd_size - 8, SEEK_CUR);
373 }
374 return offset;
375 #else /* ifdef _WIN32 */
376 return -1;
377 #endif /* ifdef _WIN32 */
378 }
379
380 /*
381 * Open the archive.
382 * Sets f_archiveFile, f_pkgstart, f_tocbuff and f_cookie.
383 */
384 int
pyi_arch_open(ARCHIVE_STATUS * status)385 pyi_arch_open(ARCHIVE_STATUS *status)
386 {
387 int search_end = 0;
388 VS("LOADER: archivename is %s\n", status->archivename);
389
390 /* Physically open the file */
391 if (pyi_arch_open_fp(status) != 0) {
392 VS("LOADER: Cannot open archive: %s\n", status->archivename);
393 return -1;
394 }
395
396 /* Find out where to stop searching for the cookie. First try to find
397 * a digital signature added by a code signing tool.
398 */
399 #if defined(WIN32) || defined(__APPLE__)
400 search_end = findDigitalSignature(status);
401 #endif
402
403 /* Signature not found or not applicable for this platform. Stop searching
404 * at end of file.
405 */
406 if (search_end < 1) {
407 fseek(status->fp, 0, SEEK_END);
408 search_end = ftell(status->fp);
409 }
410
411 /* Load status->cookie */
412 if (-1 == pyi_arch_find_cookie(status, search_end)) {
413 VS("Loader: Cannot find cookie");
414 return -1;
415 }
416
417 /* Set the flag that Python library was not loaded yet. */
418 status->is_pylib_loaded = false;
419
420 /* Set the the Python version used. */
421 pyvers = pyi_arch_get_pyversion(status);
422
423 /* Read in in the table of contents */
424 fseek(status->fp, status->pkgstart + ntohl(status->cookie.TOC), SEEK_SET);
425 status->tocbuff = (TOC *) malloc(ntohl(status->cookie.TOClen));
426
427 if (status->tocbuff == NULL) {
428 FATAL_PERROR("malloc", "Could not allocate buffer for TOC.");
429 return -1;
430 }
431
432 if (fread(status->tocbuff, ntohl(status->cookie.TOClen), 1, status->fp) < 1) {
433 FATAL_PERROR("fread", "Could not read from file.");
434 return -1;
435 }
436 status->tocend = (TOC *) (((char *)status->tocbuff) + ntohl(status->cookie.TOClen));
437
438 /* Check input file is still ok (should be). */
439 if (ferror(status->fp)) {
440 FATALERROR("Error on file\n.");
441 return -1;
442 }
443
444 /* Close file handler
445 * if file not close here it will be close in pyi_arch_status_free_memory */
446 pyi_arch_close_fp(status);
447 return 0;
448 }
449
450 /*
451 * Set up paths required by rest of this module.
452 * Sets f_archivename, f_homepath, f_mainpath
453 */
454 int
pyi_arch_set_paths(ARCHIVE_STATUS * status,char const * archivePath,char const * archiveName)455 pyi_arch_set_paths(ARCHIVE_STATUS *status, char const * archivePath,
456 char const * archiveName)
457 {
458 size_t pathlen, namelen;
459
460 pathlen = strnlen(archivePath, PATH_MAX);
461 namelen = strnlen(archiveName, PATH_MAX);
462
463 if (pathlen+namelen+1 > PATH_MAX) {
464 return -1;
465 }
466
467 /* Get the archive Path */
468 strcpy(status->archivename, archivePath);
469 strcat(status->archivename, archiveName);
470
471 /* Set homepath to where the archive is */
472 strcpy(status->homepath, archivePath);
473
474 /*
475 * Initial value of mainpath is homepath. It might be overriden
476 * by temppath if it is available.
477 */
478 status->has_temp_directory = false;
479 strcpy(status->mainpath, status->homepath);
480
481 return 0;
482 }
483
484 /* Setup the archive with python modules. (this always needs to be done) */
485 int
pyi_arch_setup(ARCHIVE_STATUS * status,char const * archivePath,char const * archiveName)486 pyi_arch_setup(ARCHIVE_STATUS *status, char const * archivePath, char const * archiveName)
487 {
488 /* Set up paths */
489 if (pyi_arch_set_paths(status, archivePath, archiveName)) {
490 return -1;
491 }
492
493 /* Open the archive */
494 if (pyi_arch_open(status)) {
495 /* If this is not an archive, we MUST close the file, */
496 /* otherwise the open file-handle will be reused when */
497 /* testing the next file. */
498 pyi_arch_close_fp(status);
499 return -1;
500 }
501 ;
502 return 0;
503 }
504
505 /*
506 * external API for iterating TOCs
507 */
508 TOC *
getFirstTocEntry(ARCHIVE_STATUS * status)509 getFirstTocEntry(ARCHIVE_STATUS *status)
510 {
511 return status->tocbuff;
512 }
513 TOC *
getNextTocEntry(ARCHIVE_STATUS * status,TOC * entry)514 getNextTocEntry(ARCHIVE_STATUS *status, TOC *entry)
515 {
516 TOC *rslt = (TOC*)((char *)entry + ntohl(entry->structlen));
517
518 if (rslt >= status->tocend) {
519 return NULL;
520 }
521 return rslt;
522 }
523
524 /*
525 * Helpers for embedders.
526 */
527 int
pyi_arch_get_pyversion(ARCHIVE_STATUS * status)528 pyi_arch_get_pyversion(ARCHIVE_STATUS *status)
529 {
530 return ntohl(status->cookie.pyvers);
531 }
532
533 /*
534 * Free memory allocated for archive status.
535 */
536 void
pyi_arch_status_free_memory(ARCHIVE_STATUS * archive_status)537 pyi_arch_status_free_memory(ARCHIVE_STATUS *archive_status)
538 {
539 if (archive_status != NULL) {
540 VS("LOADER: Freeing archive status for %s\n", archive_status->archivename);
541
542 /* Free the TOC memory from the archive status first. */
543 if (archive_status->tocbuff != NULL) {
544 free(archive_status->tocbuff);
545 }
546 /* Close file handler */
547 pyi_arch_close_fp(archive_status);
548 free(archive_status);
549 }
550 }
551
552 /*
553 * Returns the value of the pyi bootloader option given by optname. Returns
554 * NULL if the option is not present. Returns an empty string if the option is present,
555 * but has no associated value.
556 *
557 * The string returned is owned by the ARCHIVE_STATUS; the caller is NOT responsible
558 * for freeing it.
559 */
560 char *
pyi_arch_get_option(const ARCHIVE_STATUS * status,char * optname)561 pyi_arch_get_option(const ARCHIVE_STATUS * status, char * optname)
562 {
563 /* TODO: option-cache? */
564 int optlen;
565 TOC *ptoc = status->tocbuff;
566
567 optlen = strlen(optname);
568
569 for (; ptoc < status->tocend; ptoc = pyi_arch_increment_toc_ptr(status, ptoc)) {
570 if (ptoc->typcd == ARCHIVE_ITEM_RUNTIME_OPTION) {
571 if (0 == strncmp(ptoc->name, optname, optlen)) {
572 if (0 != ptoc->name[optlen]) {
573 /* Space separates option name from option value, so add 1. */
574 return ptoc->name + optlen + 1;
575 }
576 else {
577 /* No option value, just return the empty string. */
578 return ptoc->name + optlen;
579 }
580
581 }
582 }
583 }
584 return NULL;
585 }
586