1 // realsr implemented with ncnn library
2 
3 #include <stdio.h>
4 #include <algorithm>
5 #include <queue>
6 #include <vector>
7 #include <clocale>
8 
9 #if _WIN32
10 // image decoder and encoder with wic
11 #include "wic_image.h"
12 #else // _WIN32
13 // image decoder and encoder with stb
14 #define STB_IMAGE_IMPLEMENTATION
15 #define STBI_NO_PSD
16 #define STBI_NO_TGA
17 #define STBI_NO_GIF
18 #define STBI_NO_HDR
19 #define STBI_NO_PIC
20 #define STBI_NO_STDIO
21 #include "stb_image.h"
22 #define STB_IMAGE_WRITE_IMPLEMENTATION
23 #include "stb_image_write.h"
24 #endif // _WIN32
25 #include "webp_image.h"
26 
27 #if _WIN32
28 #include <wchar.h>
29 static wchar_t* optarg = NULL;
30 static int optind = 1;
getopt(int argc,wchar_t * const argv[],const wchar_t * optstring)31 static wchar_t getopt(int argc, wchar_t* const argv[], const wchar_t* optstring)
32 {
33     if (optind >= argc || argv[optind][0] != L'-')
34         return -1;
35 
36     wchar_t opt = argv[optind][1];
37     const wchar_t* p = wcschr(optstring, opt);
38     if (p == NULL)
39         return L'?';
40 
41     optarg = NULL;
42 
43     if (p[1] == L':')
44     {
45         optind++;
46         if (optind >= argc)
47             return L'?';
48 
49         optarg = argv[optind];
50     }
51 
52     optind++;
53 
54     return opt;
55 }
56 
parse_optarg_int_array(const wchar_t * optarg)57 static std::vector<int> parse_optarg_int_array(const wchar_t* optarg)
58 {
59     std::vector<int> array;
60     array.push_back(_wtoi(optarg));
61 
62     const wchar_t* p = wcschr(optarg, L',');
63     while (p)
64     {
65         p++;
66         array.push_back(_wtoi(p));
67         p = wcschr(p, L',');
68     }
69 
70     return array;
71 }
72 #else // _WIN32
73 #include <unistd.h> // getopt()
74 
parse_optarg_int_array(const char * optarg)75 static std::vector<int> parse_optarg_int_array(const char* optarg)
76 {
77     std::vector<int> array;
78     array.push_back(atoi(optarg));
79 
80     const char* p = strchr(optarg, ',');
81     while (p)
82     {
83         p++;
84         array.push_back(atoi(p));
85         p = strchr(p, ',');
86     }
87 
88     return array;
89 }
90 #endif // _WIN32
91 
92 // ncnn
93 #include "cpu.h"
94 #include "gpu.h"
95 #include "platform.h"
96 
97 #include "realsr.h"
98 
99 #include "filesystem_utils.h"
100 
print_usage()101 static void print_usage()
102 {
103     fprintf(stderr, "Usage: realsr-ncnn-vulkan -i infile -o outfile [options]...\n\n");
104     fprintf(stderr, "  -h                   show this help\n");
105     fprintf(stderr, "  -v                   verbose output\n");
106     fprintf(stderr, "  -i input-path        input image path (jpg/png/webp) or directory\n");
107     fprintf(stderr, "  -o output-path       output image path (jpg/png/webp) or directory\n");
108     fprintf(stderr, "  -s scale             upscale ratio (4, default=4)\n");
109     fprintf(stderr, "  -t tile-size         tile size (>=32/0=auto, default=0) can be 0,0,0 for multi-gpu\n");
110     fprintf(stderr, "  -m model-path        realsr model path (default=models-DF2K_JPEG)\n");
111     fprintf(stderr, "  -g gpu-id            gpu device to use (default=auto) can be 0,1,2 for multi-gpu\n");
112     fprintf(stderr, "  -j load:proc:save    thread count for load/proc/save (default=1:2:2) can be 1:2,2,2:2 for multi-gpu\n");
113     fprintf(stderr, "  -x                   enable tta mode\n");
114     fprintf(stderr, "  -f format            output image format (jpg/png/webp, default=ext/png)\n");
115 }
116 
117 class Task
118 {
119 public:
120     int id;
121     int webp;
122 
123     path_t inpath;
124     path_t outpath;
125 
126     ncnn::Mat inimage;
127     ncnn::Mat outimage;
128 };
129 
130 class TaskQueue
131 {
132 public:
TaskQueue()133     TaskQueue()
134     {
135     }
136 
put(const Task & v)137     void put(const Task& v)
138     {
139         lock.lock();
140 
141         while (tasks.size() >= 8) // FIXME hardcode queue length
142         {
143             condition.wait(lock);
144         }
145 
146         tasks.push(v);
147 
148         lock.unlock();
149 
150         condition.signal();
151     }
152 
get(Task & v)153     void get(Task& v)
154     {
155         lock.lock();
156 
157         while (tasks.size() == 0)
158         {
159             condition.wait(lock);
160         }
161 
162         v = tasks.front();
163         tasks.pop();
164 
165         lock.unlock();
166 
167         condition.signal();
168     }
169 
170 private:
171     ncnn::Mutex lock;
172     ncnn::ConditionVariable condition;
173     std::queue<Task> tasks;
174 };
175 
176 TaskQueue toproc;
177 TaskQueue tosave;
178 
179 class LoadThreadParams
180 {
181 public:
182     int scale;
183     int jobs_load;
184 
185     // session data
186     std::vector<path_t> input_files;
187     std::vector<path_t> output_files;
188 };
189 
load(void * args)190 void* load(void* args)
191 {
192     const LoadThreadParams* ltp = (const LoadThreadParams*)args;
193     const int count = ltp->input_files.size();
194     const int scale = ltp->scale;
195 
196     #pragma omp parallel for schedule(static,1) num_threads(ltp->jobs_load)
197     for (int i=0; i<count; i++)
198     {
199         const path_t& imagepath = ltp->input_files[i];
200 
201         int webp = 0;
202 
203         unsigned char* pixeldata = 0;
204         int w;
205         int h;
206         int c;
207 
208 #if _WIN32
209         FILE* fp = _wfopen(imagepath.c_str(), L"rb");
210 #else
211         FILE* fp = fopen(imagepath.c_str(), "rb");
212 #endif
213         if (fp)
214         {
215             // read whole file
216             unsigned char* filedata = 0;
217             int length = 0;
218             {
219                 fseek(fp, 0, SEEK_END);
220                 length = ftell(fp);
221                 rewind(fp);
222                 filedata = (unsigned char*)malloc(length);
223                 if (filedata)
224                 {
225                     fread(filedata, 1, length, fp);
226                 }
227                 fclose(fp);
228             }
229 
230             if (filedata)
231             {
232                 pixeldata = webp_load(filedata, length, &w, &h, &c);
233                 if (pixeldata)
234                 {
235                     webp = 1;
236                 }
237                 else
238                 {
239                     // not webp, try jpg png etc.
240 #if _WIN32
241                     pixeldata = wic_decode_image(imagepath.c_str(), &w, &h, &c);
242 #else // _WIN32
243                     pixeldata = stbi_load_from_memory(filedata, length, &w, &h, &c, 0);
244                     if (pixeldata)
245                     {
246                         // stb_image auto channel
247                         if (c == 1)
248                         {
249                             // grayscale -> rgb
250                             stbi_image_free(pixeldata);
251                             pixeldata = stbi_load_from_memory(filedata, length, &w, &h, &c, 3);
252                             c = 3;
253                         }
254                         else if (c == 2)
255                         {
256                             // grayscale + alpha -> rgba
257                             stbi_image_free(pixeldata);
258                             pixeldata = stbi_load_from_memory(filedata, length, &w, &h, &c, 4);
259                             c = 4;
260                         }
261                     }
262 #endif // _WIN32
263                 }
264 
265                 free(filedata);
266             }
267         }
268         if (pixeldata)
269         {
270             Task v;
271             v.id = i;
272             v.inpath = imagepath;
273             v.outpath = ltp->output_files[i];
274 
275             v.inimage = ncnn::Mat(w, h, (void*)pixeldata, (size_t)c, c);
276             v.outimage = ncnn::Mat(w * scale, h * scale, (size_t)c, c);
277 
278             path_t ext = get_file_extension(v.outpath);
279             if (c == 4 && (ext == PATHSTR("jpg") || ext == PATHSTR("JPG") || ext == PATHSTR("jpeg") || ext == PATHSTR("JPEG")))
280             {
281                 path_t output_filename2 = ltp->output_files[i] + PATHSTR(".png");
282                 v.outpath = output_filename2;
283 #if _WIN32
284                 fwprintf(stderr, L"image %ls has alpha channel ! %ls will output %ls\n", imagepath.c_str(), imagepath.c_str(), output_filename2.c_str());
285 #else // _WIN32
286                 fprintf(stderr, "image %s has alpha channel ! %s will output %s\n", imagepath.c_str(), imagepath.c_str(), output_filename2.c_str());
287 #endif // _WIN32
288             }
289 
290             toproc.put(v);
291         }
292         else
293         {
294 #if _WIN32
295             fwprintf(stderr, L"decode image %ls failed\n", imagepath.c_str());
296 #else // _WIN32
297             fprintf(stderr, "decode image %s failed\n", imagepath.c_str());
298 #endif // _WIN32
299         }
300     }
301 
302     return 0;
303 }
304 
305 class ProcThreadParams
306 {
307 public:
308     const RealSR* realsr;
309 };
310 
proc(void * args)311 void* proc(void* args)
312 {
313     const ProcThreadParams* ptp = (const ProcThreadParams*)args;
314     const RealSR* realsr = ptp->realsr;
315 
316     for (;;)
317     {
318         Task v;
319 
320         toproc.get(v);
321 
322         if (v.id == -233)
323             break;
324 
325         realsr->process(v.inimage, v.outimage);
326 
327         tosave.put(v);
328     }
329 
330     return 0;
331 }
332 
333 class SaveThreadParams
334 {
335 public:
336     int verbose;
337 };
338 
save(void * args)339 void* save(void* args)
340 {
341     const SaveThreadParams* stp = (const SaveThreadParams*)args;
342     const int verbose = stp->verbose;
343 
344     for (;;)
345     {
346         Task v;
347 
348         tosave.get(v);
349 
350         if (v.id == -233)
351             break;
352 
353         // free input pixel data
354         {
355             unsigned char* pixeldata = (unsigned char*)v.inimage.data;
356             if (v.webp == 1)
357             {
358                 free(pixeldata);
359             }
360             else
361             {
362 #if _WIN32
363                 free(pixeldata);
364 #else
365                 stbi_image_free(pixeldata);
366 #endif
367             }
368         }
369 
370         int success = 0;
371 
372         path_t ext = get_file_extension(v.outpath);
373 
374         if (ext == PATHSTR("webp") || ext == PATHSTR("WEBP"))
375         {
376             success = webp_save(v.outpath.c_str(), v.outimage.w, v.outimage.h, v.outimage.elempack, (const unsigned char*)v.outimage.data);
377         }
378         else if (ext == PATHSTR("png") || ext == PATHSTR("PNG"))
379         {
380 #if _WIN32
381             success = wic_encode_image(v.outpath.c_str(), v.outimage.w, v.outimage.h, v.outimage.elempack, v.outimage.data);
382 #else
383             success = stbi_write_png(v.outpath.c_str(), v.outimage.w, v.outimage.h, v.outimage.elempack, v.outimage.data, 0);
384 #endif
385         }
386         else if (ext == PATHSTR("jpg") || ext == PATHSTR("JPG") || ext == PATHSTR("jpeg") || ext == PATHSTR("JPEG"))
387         {
388 #if _WIN32
389             success = wic_encode_jpeg_image(v.outpath.c_str(), v.outimage.w, v.outimage.h, v.outimage.elempack, v.outimage.data);
390 #else
391             success = stbi_write_jpg(v.outpath.c_str(), v.outimage.w, v.outimage.h, v.outimage.elempack, v.outimage.data, 100);
392 #endif
393         }
394         if (success)
395         {
396             if (verbose)
397             {
398 #if _WIN32
399                 fwprintf(stderr, L"%ls -> %ls done\n", v.inpath.c_str(), v.outpath.c_str());
400 #else
401                 fprintf(stderr, "%s -> %s done\n", v.inpath.c_str(), v.outpath.c_str());
402 #endif
403             }
404         }
405         else
406         {
407 #if _WIN32
408             fwprintf(stderr, L"encode image %ls failed\n", v.outpath.c_str());
409 #else
410             fprintf(stderr, "encode image %s failed\n", v.outpath.c_str());
411 #endif
412         }
413     }
414 
415     return 0;
416 }
417 
418 
419 #if _WIN32
wmain(int argc,wchar_t ** argv)420 int wmain(int argc, wchar_t** argv)
421 #else
422 int main(int argc, char** argv)
423 #endif
424 {
425     path_t inputpath;
426     path_t outputpath;
427     int scale = 4;
428     std::vector<int> tilesize;
429     path_t model = PATHSTR("/usr/local/share/realsr-ncnn-vulkan/models-DF2K_JPEG");
430     std::vector<int> gpuid;
431     int jobs_load = 1;
432     std::vector<int> jobs_proc;
433     int jobs_save = 2;
434     int verbose = 0;
435     int tta_mode = 0;
436     path_t format = PATHSTR("png");
437 
438 #if _WIN32
439     setlocale(LC_ALL, "");
440     wchar_t opt;
441     while ((opt = getopt(argc, argv, L"i:o:s:t:m:g:j:f:vxh")) != (wchar_t)-1)
442     {
443         switch (opt)
444         {
445         case L'i':
446             inputpath = optarg;
447             break;
448         case L'o':
449             outputpath = optarg;
450             break;
451         case L's':
452             scale = _wtoi(optarg);
453             break;
454         case L't':
455             tilesize = parse_optarg_int_array(optarg);
456             break;
457         case L'm':
458             model = optarg;
459             break;
460         case L'g':
461             gpuid = parse_optarg_int_array(optarg);
462             break;
463         case L'j':
464             swscanf(optarg, L"%d:%*[^:]:%d", &jobs_load, &jobs_save);
465             jobs_proc = parse_optarg_int_array(wcschr(optarg, L':') + 1);
466             break;
467         case L'f':
468             format = optarg;
469             break;
470         case L'v':
471             verbose = 1;
472             break;
473         case L'x':
474             tta_mode = 1;
475             break;
476         case L'h':
477         default:
478             print_usage();
479             return -1;
480         }
481     }
482 #else // _WIN32
483     int opt;
484     while ((opt = getopt(argc, argv, "i:o:s:t:m:g:j:f:vxh")) != -1)
485     {
486         switch (opt)
487         {
488         case 'i':
489             inputpath = optarg;
490             break;
491         case 'o':
492             outputpath = optarg;
493             break;
494         case 's':
495             scale = atoi(optarg);
496             break;
497         case 't':
498             tilesize = parse_optarg_int_array(optarg);
499             break;
500         case 'm':
501             model = optarg;
502             break;
503         case 'g':
504             gpuid = parse_optarg_int_array(optarg);
505             break;
506         case 'j':
507             sscanf(optarg, "%d:%*[^:]:%d", &jobs_load, &jobs_save);
508             jobs_proc = parse_optarg_int_array(strchr(optarg, ':') + 1);
509             break;
510         case 'f':
511             format = optarg;
512             break;
513         case 'v':
514             verbose = 1;
515             break;
516         case 'x':
517             tta_mode = 1;
518             break;
519         case 'h':
520         default:
521             print_usage();
522             return -1;
523         }
524     }
525 #endif // _WIN32
526 
527     if (inputpath.empty() || outputpath.empty())
528     {
529         print_usage();
530         return -1;
531     }
532 
533     if (scale != 4)
534     {
535         fprintf(stderr, "invalid scale argument\n");
536         return -1;
537     }
538 
539     if (tilesize.size() != (gpuid.empty() ? 1 : gpuid.size()) && !tilesize.empty())
540     {
541         fprintf(stderr, "invalid tilesize argument\n");
542         return -1;
543     }
544 
545     for (int i=0; i<(int)tilesize.size(); i++)
546     {
547         if (tilesize[i] != 0 && tilesize[i] < 32)
548         {
549             fprintf(stderr, "invalid tilesize argument\n");
550             return -1;
551         }
552     }
553 
554     if (jobs_load < 1 || jobs_save < 1)
555     {
556         fprintf(stderr, "invalid thread count argument\n");
557         return -1;
558     }
559 
560     if (jobs_proc.size() != (gpuid.empty() ? 1 : gpuid.size()) && !jobs_proc.empty())
561     {
562         fprintf(stderr, "invalid jobs_proc thread count argument\n");
563         return -1;
564     }
565 
566     for (int i=0; i<(int)jobs_proc.size(); i++)
567     {
568         if (jobs_proc[i] < 1)
569         {
570             fprintf(stderr, "invalid jobs_proc thread count argument\n");
571             return -1;
572         }
573     }
574 
575     if (!path_is_directory(outputpath))
576     {
577         // guess format from outputpath no matter what format argument specified
578         path_t ext = get_file_extension(outputpath);
579 
580         if (ext == PATHSTR("png") || ext == PATHSTR("PNG"))
581         {
582             format = PATHSTR("png");
583         }
584         else if (ext == PATHSTR("webp") || ext == PATHSTR("WEBP"))
585         {
586             format = PATHSTR("webp");
587         }
588         else if (ext == PATHSTR("jpg") || ext == PATHSTR("JPG") || ext == PATHSTR("jpeg") || ext == PATHSTR("JPEG"))
589         {
590             format = PATHSTR("jpg");
591         }
592         else
593         {
594             fprintf(stderr, "invalid outputpath extension type\n");
595             return -1;
596         }
597     }
598 
599     if (format != PATHSTR("png") && format != PATHSTR("webp") && format != PATHSTR("jpg"))
600     {
601         fprintf(stderr, "invalid format argument\n");
602         return -1;
603     }
604 
605     // collect input and output filepath
606     std::vector<path_t> input_files;
607     std::vector<path_t> output_files;
608     {
609         if (path_is_directory(inputpath) && path_is_directory(outputpath))
610         {
611             std::vector<path_t> filenames;
612             int lr = list_directory(inputpath, filenames);
613             if (lr != 0)
614                 return -1;
615 
616             const int count = filenames.size();
617             input_files.resize(count);
618             output_files.resize(count);
619 
620             path_t last_filename;
621             path_t last_filename_noext;
622             for (int i=0; i<count; i++)
623             {
624                 path_t filename = filenames[i];
625                 path_t filename_noext = get_file_name_without_extension(filename);
626                 path_t output_filename = filename_noext + PATHSTR('.') + format;
627 
628                 // filename list is sorted, check if output image path conflicts
629                 if (filename_noext == last_filename_noext)
630                 {
631                     path_t output_filename2 = filename + PATHSTR('.') + format;
632 #if _WIN32
633                     fwprintf(stderr, L"both %ls and %ls output %ls ! %ls will output %ls\n", filename.c_str(), last_filename.c_str(), output_filename.c_str(), filename.c_str(), output_filename2.c_str());
634 #else
635                     fprintf(stderr, "both %s and %s output %s ! %s will output %s\n", filename.c_str(), last_filename.c_str(), output_filename.c_str(), filename.c_str(), output_filename2.c_str());
636 #endif
637                     output_filename = output_filename2;
638                 }
639                 else
640                 {
641                     last_filename = filename;
642                     last_filename_noext = filename_noext;
643                 }
644 
645                 input_files[i] = inputpath + PATHSTR('/') + filename;
646                 output_files[i] = outputpath + PATHSTR('/') + output_filename;
647             }
648         }
649         else if (!path_is_directory(inputpath) && !path_is_directory(outputpath))
650         {
651             input_files.push_back(inputpath);
652             output_files.push_back(outputpath);
653         }
654         else
655         {
656             fprintf(stderr, "inputpath and outputpath must be either file or directory at the same time\n");
657             return -1;
658         }
659     }
660 
661     int prepadding = 0;
662 
663     if (model.find(PATHSTR("/usr/local/share/realsr-ncnn-vulkan/models-DF2K")) != path_t::npos
664         || model.find(PATHSTR("/usr/local/share/realsr-ncnn-vulkan/models-DF2K_JPEG")) != path_t::npos)
665     {
666         prepadding = 10;
667     }
668     else
669     {
670         fprintf(stderr, "unknown model dir type\n");
671         return -1;
672     }
673 
674 #if _WIN32
675     wchar_t parampath[256];
676     wchar_t modelpath[256];
677     if (scale == 4)
678     {
679         swprintf(parampath, 256, L"%s/x4.param", model.c_str());
680         swprintf(modelpath, 256, L"%s/x4.bin", model.c_str());
681     }
682 #else
683     char parampath[256];
684     char modelpath[256];
685     if (scale == 4)
686     {
687         sprintf(parampath, "%s/x4.param", model.c_str());
688         sprintf(modelpath, "%s/x4.bin", model.c_str());
689     }
690 #endif
691 
692     path_t paramfullpath = sanitize_filepath(parampath);
693     path_t modelfullpath = sanitize_filepath(modelpath);
694 
695 #if _WIN32
696     CoInitializeEx(NULL, COINIT_MULTITHREADED);
697 #endif
698 
699     ncnn::create_gpu_instance();
700 
701     if (gpuid.empty())
702     {
703         gpuid.push_back(ncnn::get_default_gpu_index());
704     }
705 
706     const int use_gpu_count = (int)gpuid.size();
707 
708     if (jobs_proc.empty())
709     {
710         jobs_proc.resize(use_gpu_count, 2);
711     }
712 
713     if (tilesize.empty())
714     {
715         tilesize.resize(use_gpu_count, 0);
716     }
717 
718     int cpu_count = std::max(1, ncnn::get_cpu_count());
719     jobs_load = std::min(jobs_load, cpu_count);
720     jobs_save = std::min(jobs_save, cpu_count);
721 
722     int gpu_count = ncnn::get_gpu_count();
723     for (int i=0; i<use_gpu_count; i++)
724     {
725         if (gpuid[i] < 0 || gpuid[i] >= gpu_count)
726         {
727             fprintf(stderr, "invalid gpu device\n");
728 
729             ncnn::destroy_gpu_instance();
730             return -1;
731         }
732     }
733 
734     int total_jobs_proc = 0;
735     for (int i=0; i<use_gpu_count; i++)
736     {
737         int gpu_queue_count = ncnn::get_gpu_info(gpuid[i]).compute_queue_count();
738         jobs_proc[i] = std::min(jobs_proc[i], gpu_queue_count);
739         total_jobs_proc += jobs_proc[i];
740     }
741 
742     for (int i=0; i<use_gpu_count; i++)
743     {
744         if (tilesize[i] != 0)
745             continue;
746 
747         uint32_t heap_budget = ncnn::get_gpu_device(gpuid[i])->get_heap_budget();
748 
749         // more fine-grained tilesize policy here
750         if (model.find(PATHSTR("/usr/local/share/realsr-ncnn-vulkan/models-DF2K")) != path_t::npos
751             || model.find(PATHSTR("/usr/local/share/realsr-ncnn-vulkan/models-DF2K_JPEG")) != path_t::npos)
752         {
753             if (heap_budget > 1900)
754                 tilesize[i] = 200;
755             else if (heap_budget > 550)
756                 tilesize[i] = 100;
757             else if (heap_budget > 190)
758                 tilesize[i] = 64;
759             else
760                 tilesize[i] = 32;
761         }
762     }
763 
764     {
765         std::vector<RealSR*> realsr(use_gpu_count);
766 
767         for (int i=0; i<use_gpu_count; i++)
768         {
769             realsr[i] = new RealSR(gpuid[i], tta_mode);
770 
771             realsr[i]->load(paramfullpath, modelfullpath);
772 
773             realsr[i]->scale = scale;
774             realsr[i]->tilesize = tilesize[i];
775             realsr[i]->prepadding = prepadding;
776         }
777 
778         // main routine
779         {
780             // load image
781             LoadThreadParams ltp;
782             ltp.scale = scale;
783             ltp.jobs_load = jobs_load;
784             ltp.input_files = input_files;
785             ltp.output_files = output_files;
786 
787             ncnn::Thread load_thread(load, (void*)&ltp);
788 
789             // realsr proc
790             std::vector<ProcThreadParams> ptp(use_gpu_count);
791             for (int i=0; i<use_gpu_count; i++)
792             {
793                 ptp[i].realsr = realsr[i];
794             }
795 
796             std::vector<ncnn::Thread*> proc_threads(total_jobs_proc);
797             {
798                 int total_jobs_proc_id = 0;
799                 for (int i=0; i<use_gpu_count; i++)
800                 {
801                     for (int j=0; j<jobs_proc[i]; j++)
802                     {
803                         proc_threads[total_jobs_proc_id++] = new ncnn::Thread(proc, (void*)&ptp[i]);
804                     }
805                 }
806             }
807 
808             // save image
809             SaveThreadParams stp;
810             stp.verbose = verbose;
811 
812             std::vector<ncnn::Thread*> save_threads(jobs_save);
813             for (int i=0; i<jobs_save; i++)
814             {
815                 save_threads[i] = new ncnn::Thread(save, (void*)&stp);
816             }
817 
818             // end
819             load_thread.join();
820 
821             Task end;
822             end.id = -233;
823 
824             for (int i=0; i<total_jobs_proc; i++)
825             {
826                 toproc.put(end);
827             }
828 
829             for (int i=0; i<total_jobs_proc; i++)
830             {
831                 proc_threads[i]->join();
832                 delete proc_threads[i];
833             }
834 
835             for (int i=0; i<jobs_save; i++)
836             {
837                 tosave.put(end);
838             }
839 
840             for (int i=0; i<jobs_save; i++)
841             {
842                 save_threads[i]->join();
843                 delete save_threads[i];
844             }
845         }
846 
847         for (int i=0; i<use_gpu_count; i++)
848         {
849             delete realsr[i];
850         }
851         realsr.clear();
852     }
853 
854     ncnn::destroy_gpu_instance();
855 
856     return 0;
857 }
858