1 /* ************************************************************************
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 * ************************************************************************/
16
17 #ifdef _MSC_VER
18 #pragma warning(disable : 4996)
19 #endif
20
21 #include "fft_binary_lookup.h"
22
23 #include <iostream>
24 #include <fstream>
25 #include <cassert>
26
27 #include <stdio.h>
28 #include <errno.h>
29 #include <fcntl.h>
30 #include <sys/types.h>
31 #include <sys/stat.h>
32 #include <stdlib.h>
33
34 #ifdef _WIN32
35 #include <windows.h>
36 #include <direct.h> // for _mkdir
37 #else
38 #include <unistd.h>
39 #endif
40
41 extern "C"
42 {
43 #include "md5sum.h"
44 }
45
46 // size for clGetDeviceInfo queries
47 #define SIZE 256
48
49 #define ENABLE_SOURCE_DUMP 0
50
51
52 #define CAPS_DEBUG 0
53
54 #include <string.h>
55
sep()56 static char * sep()
57 {
58 #ifdef _WIN32
59 return (char*)"\\";
60 #else
61 return (char*)"/";
62 #endif
63 }
64
65 static std::string cache_path;
66 static bool cache_enabled(false);
67 static bool request_nomemalloc(false);
68
clfftInitRequestLibNoMemAlloc()69 void clfftInitRequestLibNoMemAlloc()
70 {
71 const char * val = getenv("CLFFT_REQUEST_LIB_NOMEMALLOC");
72
73 if (val)
74 request_nomemalloc = true;
75 }
76
clfftGetRequestLibNoMemAlloc()77 bool clfftGetRequestLibNoMemAlloc()
78 {
79 return request_nomemalloc;
80 }
81
clfftInitBinaryCache()82 void clfftInitBinaryCache()
83 {
84 const char * path = getenv("CLFFT_CACHE_PATH");
85 if (path)
86 {
87 cache_path = std::string(path) + sep();
88 cache_enabled = true;
89 }
90 else
91 {
92 cache_path = "";
93 }
94 }
95
CacheEntry(const std::string & filename)96 FFTBinaryLookup::CacheEntry::CacheEntry(const std::string & filename)
97 : m_filename(filename), m_successful_creation(false)
98 {
99
100 }
101
close()102 void FFTBinaryLookup::CacheEntry::close()
103 {
104 #ifdef _WIN32
105 CloseHandle(this->m_handle);
106 #else
107 ::close(*(int*)this->m_handle);
108 //delete (int*)this->m_handle;
109 #endif
110 }
111
successful_creation()112 bool FFTBinaryLookup::CacheEntry::successful_creation()
113 {
114 return this->m_successful_creation;
115 }
116
exclusive_create()117 bool FFTBinaryLookup::CacheEntry::exclusive_create()
118 {
119 #ifdef _WIN32
120 std::wstring tmp;
121 tmp.assign(this->m_filename.begin(), this->m_filename.end());
122
123 HANDLE handle = CreateFile(tmp.c_str(),
124 GENERIC_WRITE,
125 0, // no share with other process
126 NULL,
127 CREATE_NEW,
128 FILE_ATTRIBUTE_NORMAL,
129 NULL);
130
131 this->m_handle = handle;
132 this->m_successful_creation = (handle != INVALID_HANDLE_VALUE);
133 return this->m_successful_creation;
134 #else
135 int * fd = new int;
136 *fd = open (this->m_filename.c_str(),
137 O_CREAT | O_EXCL,
138 S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
139 this->m_handle = fd;
140 this->m_successful_creation = (*fd != -1);
141 return *fd >= 0;
142 #endif
143 }
144
FFTBinaryLookup(const clfftGenerators gen,const clfftPlanHandle plHandle,cl_context ctxt,cl_device_id device)145 FFTBinaryLookup::FFTBinaryLookup(const clfftGenerators gen, const clfftPlanHandle plHandle, cl_context ctxt, cl_device_id device)
146 : m_context(ctxt), m_device(device), m_program(NULL), m_binary(0), m_signature(0), m_cache_enabled(cache_enabled)
147 {
148 // initialize the entry name
149 this->m_cache_entry_name = getKernelName(gen, plHandle, false);
150
151 if (this->m_cache_enabled)
152 {
153 // retrieve device informations to compute the path of the cache
154 cl_int err = this->retrieveDeviceAndDriverInfo();
155
156 if (err != CL_SUCCESS)
157 {
158 cache_enabled = false;
159 this->m_cache_enabled = false;
160 }
161 }
162 }
163
~FFTBinaryLookup()164 FFTBinaryLookup::~FFTBinaryLookup()
165 {
166 if (this->m_binary != NULL)
167 {
168 delete[] this->m_binary;
169 this->m_binary = 0;
170 }
171
172 if (this->m_signature != NULL)
173 {
174 delete[] this->m_signature;
175 this->m_signature = 0;
176 }
177 }
178
Variant()179 FFTBinaryLookup::Variant::Variant()
180 : m_kind((VariantKind)0), m_size(0), m_data(0)
181 {
182 }
183
Variant(VariantKind kind,char * data,size_t size)184 FFTBinaryLookup::Variant::Variant(VariantKind kind, char * data, size_t size)
185 : m_kind(kind), m_size(size)
186 {
187 this->m_data = new char[this->m_size];
188 memcpy(this->m_data, data, size);
189 }
190
Variant(const Variant & obj)191 FFTBinaryLookup::Variant::Variant(const Variant &obj)
192 : m_kind(obj.m_kind), m_size(obj.m_size)
193 {
194 this->m_data = new char[this->m_size];
195 memcpy(this->m_data, obj.m_data, m_size);
196 }
197
operator =(const Variant & obj)198 FFTBinaryLookup::Variant &FFTBinaryLookup::Variant::operator=(const Variant &obj)
199 {
200 if (this->m_data != NULL)
201 {
202 delete[] this->m_data;
203 this->m_data = 0;
204 }
205
206 m_kind = obj.m_kind;
207 m_size = obj.m_size;
208
209 this->m_data = new char[this->m_size];
210 memcpy(this->m_data, obj.m_data, m_size);
211
212 return *this;
213 }
214
~Variant()215 FFTBinaryLookup::Variant::~Variant()
216 {
217 if (this->m_data != NULL)
218 {
219 delete[] this->m_data;
220 this->m_data = 0;
221 }
222 }
223
variantInt(int num)224 void FFTBinaryLookup::variantInt(int num)
225 {
226 m_variants.push_back(Variant(INT, (char*)&num, sizeof(num)));
227 }
228
variantDouble(double num)229 void FFTBinaryLookup::variantDouble(double num)
230 {
231 m_variants.push_back(Variant(DOUBLE, (char*)&num, sizeof(num)));
232 }
233
variantCompileOptions(const std::string & opts)234 void FFTBinaryLookup::variantCompileOptions(const std::string & opts)
235 {
236 m_variants.push_back(Variant(STRING, (char*)opts.c_str(), opts.size()));
237 }
238
variantRaw(const void * data,size_t bytes)239 void FFTBinaryLookup::variantRaw(const void * data, size_t bytes)
240 {
241 m_variants.push_back(Variant(DATA, (char*)data, bytes));
242 }
243
244 enum BinaryRepresentation
245 {
246 LSB,
247 MSB,
248 UNKNOWN
249 };
250
getStorageMode(char * data)251 static enum BinaryRepresentation getStorageMode(char * data)
252 {
253 if (data[0] == 'C' &&
254 data[1] == 'L' &&
255 data[2] == 'B' &&
256 data[3] == '\0')
257 return LSB;
258
259 if (data[0] == 'B' &&
260 data[1] == 'L' &&
261 data[2] == 'C' &&
262 data[3] == '\0')
263 return MSB;
264
265 return UNKNOWN;
266 }
267
finalizeVariant()268 void FFTBinaryLookup::finalizeVariant()
269 {
270 // serialize variants
271 size_t whole_variant_size_in_bytes = 0;
272
273 // store 1 byte for the variant kind
274 whole_variant_size_in_bytes += this->m_variants.size() * sizeof(int); // for the variant kind
275 whole_variant_size_in_bytes += this->m_variants.size() * sizeof(size_t); // for the variant size
276
277 // add every variant sizes
278 for(size_t i=0 ; i<this->m_variants.size() ; ++i)
279 {
280 const Variant & v = this->m_variants[i];
281
282 // compute the whole size of the signature
283 whole_variant_size_in_bytes += v.m_size;
284 }
285
286 this->m_header.signature_size = whole_variant_size_in_bytes;
287
288 if (this->m_signature != NULL)
289 {
290 delete[] this->m_signature;
291 this->m_signature = 0;
292 }
293
294 this->m_signature = new char[whole_variant_size_in_bytes];
295 char * current_address = this->m_signature;
296 for(size_t i=0 ; i<this->m_variants.size() ; ++i)
297 {
298 Variant v = this->m_variants[i];
299
300 // write the variant kind
301 memcpy(current_address, &v.m_kind, sizeof(int));
302 current_address += sizeof(v.m_kind);
303
304 // write the variant size
305 memcpy(current_address, &v.m_size, sizeof(v.m_size));
306 current_address += sizeof(v.m_size);
307
308 // write the variant itself
309 memcpy(current_address, v.m_data, v.m_size);
310 current_address += v.m_size;
311 }
312
313 // Update the cache entry name if there are variants...
314 if (whole_variant_size_in_bytes != 0)
315 {
316 char md5_sum[33];
317 md5sum(this->m_signature, (unsigned long)this->m_header.signature_size, md5_sum);
318 this->m_cache_entry_name = md5_sum;
319 }
320 else
321 {
322 this->m_cache_entry_name += ".db";
323 }
324 }
325
loadHeader(std::ifstream & file,size_t length)326 bool FFTBinaryLookup::loadHeader(std::ifstream &file, size_t length)
327 {
328 file.read ((char*)&this->m_header, sizeof(Header));
329
330 // FIXME: Consider LSB Vs MSB number representation
331 assert(getStorageMode(this->m_header.magic_key) == LSB);
332
333 if (this->m_header.whole_file_size != (int)length)
334 {
335 // the file has not been correctly initialized (yet)
336 return false;
337 }
338
339 return true;
340 }
341
loadBinaryAndSignature(std::ifstream & file)342 bool FFTBinaryLookup::loadBinaryAndSignature(std::ifstream &file)
343 {
344 {
345 this->m_binary = new unsigned char [this->m_header.binary_size];
346 const std::istream& res = file.read((char*)this->m_binary, this->m_header.binary_size);
347 if (!res.good())
348 return false;
349 }
350
351 {
352 if (this->m_signature != NULL)
353 {
354 delete[] this->m_signature;
355 this->m_signature = 0;
356 }
357
358 this->m_signature = new char [this->m_header.signature_size];
359 const std::istream& res = file.read((char*)this->m_signature, this->m_header.signature_size);
360
361 if (!res.good())
362 return false;
363
364 this->m_variants.clear();
365
366 char * current = this->m_signature;
367 for (size_t i=0 ; i<this->m_header.signature_size ; ++i)
368 {
369 Variant v;
370 v.m_kind = *(VariantKind*) current;
371 i += sizeof(int);
372 current += sizeof(int);
373
374 v.m_size = *(size_t*) current;
375 i += sizeof(size_t);
376 current += sizeof(size_t);
377
378 v.m_data = new char[v.m_size];
379 memcpy(v.m_data, current, v.m_size);
380 i += v.m_size;
381 current += v.m_size;
382
383 this->m_variants.push_back(v);
384 }
385 }
386
387 return true;
388 }
389
tryLoadCacheFile()390 bool FFTBinaryLookup::tryLoadCacheFile()
391 {
392 // may create empty file or may wait until file is ready
393 const std::string & filename = this->m_path + this->m_cache_entry_name;
394 std::ifstream file (filename.c_str(), std::ios_base::binary);
395
396 if (file.is_open())
397 {
398 file.seekg (0, file.end);
399 size_t length = file.tellg();
400 file.seekg (0, file.beg);
401
402 if (length == 0)
403 {
404 // the file is corrupted, so return false
405 return false;
406 }
407
408 bool st;
409 st = loadHeader(file, length);
410
411 if (! st)
412 return false;
413
414 st = loadBinaryAndSignature(file);
415
416 if (! st)
417 return false;
418
419 file.close();
420 return true;
421 }
422 else
423 {
424 return false;
425 }
426 }
427
found()428 bool FFTBinaryLookup::found()
429 {
430 // if we could not create the directory, it is useless to
431 if (! this->m_cache_enabled)
432 {
433 return false; // not found
434 }
435
436 this->finalizeVariant(); // serialize variant and cumpute checksum on it
437 // also compute the tree to search from the cache entry (this->m_cache_entry_name, cache path ??)
438
439 if (tryLoadCacheFile())
440 {
441 cl_int err = buildFromBinary(this->m_binary,
442 this->m_header.binary_size);
443
444 // return false if the buildFromBinary failed, true else
445 return err==CL_SUCCESS;
446 }
447
448 return false;
449 }
450
getSingleBinaryFromProgram(cl_program program,std::vector<unsigned char * > & binary)451 static cl_int getSingleBinaryFromProgram(cl_program program,
452 std::vector<unsigned char*> & binary)
453 {
454 // 3 - Determine the size of each program binary
455 size_t size;
456 cl_int err = clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES,
457 sizeof(size_t),
458 &size, NULL);
459 if (err != CL_SUCCESS)
460 {
461 std::cerr << "Error querying for program binary sizes" << std::endl;
462 return err;
463 }
464
465 binary.resize(size);
466 binary[0] = new unsigned char[size];
467
468 unsigned char * binary_address[1] = { binary[0] };
469
470 // 4 - Get all of the program binaries
471 err = clGetProgramInfo(program, CL_PROGRAM_BINARIES, 1 * sizeof(unsigned char*),
472 binary_address, NULL);
473
474
475 if (err != CL_SUCCESS)
476 {
477 delete[] binary[0];
478 #if CAPS_DEBUG
479 std::cerr << "Error querying for program binaries" << std::endl;
480 #endif
481 return err;
482 }
483
484 return CL_SUCCESS;
485 }
486
writeCacheFile(std::vector<unsigned char * > & data)487 cl_int FFTBinaryLookup::writeCacheFile(std::vector<unsigned char*> &data)
488 {
489 if (! this->m_cache_enabled)
490 {
491 return 0;
492 }
493
494 // exclusive open to ensure that only one thread will write the file
495 const std::string & filename = this->m_path + this->m_cache_entry_name;
496
497 CacheEntry cache_file(filename);
498 bool created = cache_file.exclusive_create();
499
500 // try to exclusively create the cache file on the disk
501 if (created)
502 {
503 // if it was created by the current thread, this one will write into cache file
504 cache_file.close();
505
506 const std::string & filename = this->m_path + this->m_cache_entry_name;
507 std::ofstream file (filename.c_str(), std::ios_base::binary);
508
509 file.write((char*)&this->m_header, sizeof(m_header));
510 file.write((char*)data[0], this->m_header.binary_size);
511 file.write((char*)this->m_signature, this->m_header.signature_size);
512 file.close();
513
514 #if ENABLE_SOURCE_DUMP
515 const std::string & srcFilename = this->m_path + this->m_cache_entry_name + ".cl";
516 std::ofstream srcFile (srcFilename.c_str());
517 srcFile << this->m_source;
518
519 srcFile.close();
520 #endif
521
522 return CL_SUCCESS;
523 }
524
525 // other thread do not write the cache file
526 return 1;
527 }
528
populateCache()529 cl_int FFTBinaryLookup::populateCache()
530 {
531 // FIXME: support MSB
532 this->m_header.magic_key[0] = 'C';
533 this->m_header.magic_key[1] = 'L';
534 this->m_header.magic_key[2] = 'B';
535 this->m_header.magic_key[3] = '\0';
536
537 std::vector<unsigned char*> data;
538 cl_int err = getSingleBinaryFromProgram(this->m_program, data);
539
540 if (err != CL_SUCCESS)
541 {
542 return err;
543 }
544
545 this->m_header.header_size = sizeof(Header);
546 this->m_header.binary_size = data.size();
547 this->m_header.whole_file_size = this->m_header.header_size + this->m_header.binary_size + this->m_header.signature_size;
548
549 writeCacheFile(data); // ignore return code, because it does nothing if
550 // the file could not be written (i.e the current
551 // thread did not create the file
552 delete [] data[0];
553
554 return CL_SUCCESS;
555 }
556
buildFromSource(const char * source)557 cl_int FFTBinaryLookup::buildFromSource(const char * source)
558 {
559 cl_int err;
560 this->m_program = FFTBinaryLookup::buildProgramFromSource(source,
561 this->m_context,
562 this->m_device,
563 err);
564
565 if (err != CL_SUCCESS)
566 {
567 return err;
568 }
569
570 // write to the cache
571 this->populateCache();
572
573 return CL_SUCCESS;
574 }
575
buildFromLoadedBinary(const void * data,size_t len)576 cl_int FFTBinaryLookup::buildFromLoadedBinary(const void * data,
577 size_t len)
578 {
579 cl_int err;
580 this->m_program = FFTBinaryLookup::buildProgramFromBinary((char*) data,
581 len,
582 this->m_context,
583 this->m_device,
584 err);
585
586 return err;
587 }
588
buildFromBinary(const void * data,size_t len)589 cl_int FFTBinaryLookup::buildFromBinary(const void * data,
590 size_t len)
591 {
592 cl_int err = buildFromLoadedBinary(data, len);
593 if (err != CL_SUCCESS)
594 return err;
595
596 // write to the cache
597 this->populateCache();
598
599 return CL_SUCCESS;
600 }
601
buildProgramFromSource(const char * source,cl_context context,cl_device_id device,cl_int & err,const char * options)602 cl_program FFTBinaryLookup::buildProgramFromSource(const char * source,
603 cl_context context,
604 cl_device_id device,
605 cl_int & err,
606 const char * options)
607 {
608 cl_program program = clCreateProgramWithSource(context, 1, (const char **)&source, NULL, &err);
609
610 if (err != CL_SUCCESS)
611 return NULL;
612
613 err = clBuildProgram(program,
614 1, /* FIXME: 1 device */
615 &device,
616 options,
617 NULL,
618 NULL);
619
620 if (err != CL_SUCCESS)
621 return NULL;
622
623 return program;
624 }
625
626
627
buildProgramFromBinary(const char * data,size_t data_size,cl_context context,cl_device_id device,cl_int & err,const char * options)628 cl_program FFTBinaryLookup::buildProgramFromBinary(const char * data,
629 size_t data_size,
630 cl_context context,
631 cl_device_id device,
632 cl_int & err,
633 const char * options)
634 {
635 cl_program program = clCreateProgramWithBinary(context,
636 1, // num_device
637 &device, // device_list
638 &data_size, // lengths
639 (const unsigned char **)&data,
640 NULL,
641 &err);
642 if (err != CL_SUCCESS)
643 {
644 // FIXME: emit an internal message for OPENCL errors
645 return NULL;
646 }
647
648 err = clBuildProgram(program,
649 1, /* FIXME: 1 device */
650 &device,
651 options,
652 NULL,
653 NULL);
654
655 if (err != CL_SUCCESS)
656 {
657 return NULL;
658 }
659
660 return program;
661 }
662
getProgram()663 cl_program FFTBinaryLookup::getProgram()
664 {
665 return this->m_program;
666 }
667
setProgram(cl_program program,const char * source)668 void FFTBinaryLookup::setProgram(cl_program program, const char * source)
669 {
670 this->m_program = program;
671 this->m_source = source;
672 }
673
674
make_directory(const std::string & path)675 static int make_directory(const std::string &path)
676 {
677 #ifdef _WIN32
678 return _mkdir (path.c_str());
679 #else
680 return mkdir (path.c_str(), S_IRWXU);
681 #endif
682 }
683
do_mkdir(const std::string & path)684 static void do_mkdir(const std::string &path)
685 {
686 int st = make_directory (path.c_str());
687
688 if (st != 0)
689 {
690 if ( errno != EEXIST )
691 {
692 std::string tmp = "Cannot not create directory '" + std::string(path) + "': ";
693 throw tmp;
694 }
695 }
696 }
697
retrieveDeviceAndDriverInfo()698 cl_int FFTBinaryLookup::retrieveDeviceAndDriverInfo()
699 {
700 char m_device_vendor[SIZE];
701 char m_device_name[SIZE];
702 char m_driver_version[SIZE];
703
704 cl_int err = clGetDeviceInfo(this->m_device, CL_DEVICE_VENDOR, sizeof(m_device_vendor),
705 &m_device_vendor, NULL);
706 if (err != CL_SUCCESS)
707 {
708 return err;
709 }
710
711 err = clGetDeviceInfo(this->m_device, CL_DEVICE_NAME, sizeof(m_device_name),
712 &m_device_name, NULL);
713 if (err != CL_SUCCESS)
714 {
715 return err;
716 }
717
718 err = clGetDeviceInfo(this->m_device, CL_DRIVER_VERSION, sizeof(m_driver_version),
719 &m_driver_version, NULL);
720 if (err != CL_SUCCESS)
721 {
722 return err;
723 }
724
725 #if CAPS_DEBUG
726 fprintf(stderr, "device vendor = %s\n", this->m_device_vendor);
727 fprintf(stderr, "device name = %s\n", this->m_device_name);
728 fprintf(stderr, "driver version = %s\n", this->m_driver_version);
729 #endif
730
731 try
732 {
733 const std::string & root = (std::string(cache_path) + m_device_vendor + sep());
734 do_mkdir(root.c_str());
735
736 const std::string & root2 = (root + m_device_name + sep());
737 do_mkdir(root2.c_str());
738
739 const std::string & root3 = (root2 + m_driver_version + sep());
740 do_mkdir(root3.c_str());
741
742 const std::string & root4 = (root3 + this->m_cache_entry_name + sep());
743 do_mkdir(root4.c_str());
744
745 this->m_path = root4;
746
747 return CL_SUCCESS;
748 }
749 catch (std::string & e)
750 {
751 fprintf(stderr, "%s\n", e.c_str());
752 cache_enabled = false;
753 this->m_cache_enabled = false;
754
755 return CL_INVALID_VALUE;
756 }
757 }
758