1 // realsr implemented with ncnn library
2
3 #include <stdio.h>
4 #include <algorithm>
5 #include <queue>
6 #include <vector>
7 #include <clocale>
8
9 #if _WIN32
10 // image decoder and encoder with wic
11 #include "wic_image.h"
12 #else // _WIN32
13 // image decoder and encoder with stb
14 #define STB_IMAGE_IMPLEMENTATION
15 #define STBI_NO_PSD
16 #define STBI_NO_TGA
17 #define STBI_NO_GIF
18 #define STBI_NO_HDR
19 #define STBI_NO_PIC
20 #define STBI_NO_STDIO
21 #include "stb_image.h"
22 #define STB_IMAGE_WRITE_IMPLEMENTATION
23 #include "stb_image_write.h"
24 #endif // _WIN32
25 #include "webp_image.h"
26
27 #if _WIN32
28 #include <wchar.h>
29 static wchar_t* optarg = NULL;
30 static int optind = 1;
getopt(int argc,wchar_t * const argv[],const wchar_t * optstring)31 static wchar_t getopt(int argc, wchar_t* const argv[], const wchar_t* optstring)
32 {
33 if (optind >= argc || argv[optind][0] != L'-')
34 return -1;
35
36 wchar_t opt = argv[optind][1];
37 const wchar_t* p = wcschr(optstring, opt);
38 if (p == NULL)
39 return L'?';
40
41 optarg = NULL;
42
43 if (p[1] == L':')
44 {
45 optind++;
46 if (optind >= argc)
47 return L'?';
48
49 optarg = argv[optind];
50 }
51
52 optind++;
53
54 return opt;
55 }
56
parse_optarg_int_array(const wchar_t * optarg)57 static std::vector<int> parse_optarg_int_array(const wchar_t* optarg)
58 {
59 std::vector<int> array;
60 array.push_back(_wtoi(optarg));
61
62 const wchar_t* p = wcschr(optarg, L',');
63 while (p)
64 {
65 p++;
66 array.push_back(_wtoi(p));
67 p = wcschr(p, L',');
68 }
69
70 return array;
71 }
72 #else // _WIN32
73 #include <unistd.h> // getopt()
74
parse_optarg_int_array(const char * optarg)75 static std::vector<int> parse_optarg_int_array(const char* optarg)
76 {
77 std::vector<int> array;
78 array.push_back(atoi(optarg));
79
80 const char* p = strchr(optarg, ',');
81 while (p)
82 {
83 p++;
84 array.push_back(atoi(p));
85 p = strchr(p, ',');
86 }
87
88 return array;
89 }
90 #endif // _WIN32
91
92 // ncnn
93 #include "cpu.h"
94 #include "gpu.h"
95 #include "platform.h"
96
97 #include "realsr.h"
98
99 #include "filesystem_utils.h"
100
print_usage()101 static void print_usage()
102 {
103 fprintf(stderr, "Usage: realsr-ncnn-vulkan -i infile -o outfile [options]...\n\n");
104 fprintf(stderr, " -h show this help\n");
105 fprintf(stderr, " -v verbose output\n");
106 fprintf(stderr, " -i input-path input image path (jpg/png/webp) or directory\n");
107 fprintf(stderr, " -o output-path output image path (jpg/png/webp) or directory\n");
108 fprintf(stderr, " -s scale upscale ratio (4, default=4)\n");
109 fprintf(stderr, " -t tile-size tile size (>=32/0=auto, default=0) can be 0,0,0 for multi-gpu\n");
110 fprintf(stderr, " -m model-path realsr model path (default=models-DF2K_JPEG)\n");
111 fprintf(stderr, " -g gpu-id gpu device to use (default=auto) can be 0,1,2 for multi-gpu\n");
112 fprintf(stderr, " -j load:proc:save thread count for load/proc/save (default=1:2:2) can be 1:2,2,2:2 for multi-gpu\n");
113 fprintf(stderr, " -x enable tta mode\n");
114 fprintf(stderr, " -f format output image format (jpg/png/webp, default=ext/png)\n");
115 }
116
117 class Task
118 {
119 public:
120 int id;
121 int webp;
122
123 path_t inpath;
124 path_t outpath;
125
126 ncnn::Mat inimage;
127 ncnn::Mat outimage;
128 };
129
130 class TaskQueue
131 {
132 public:
TaskQueue()133 TaskQueue()
134 {
135 }
136
put(const Task & v)137 void put(const Task& v)
138 {
139 lock.lock();
140
141 while (tasks.size() >= 8) // FIXME hardcode queue length
142 {
143 condition.wait(lock);
144 }
145
146 tasks.push(v);
147
148 lock.unlock();
149
150 condition.signal();
151 }
152
get(Task & v)153 void get(Task& v)
154 {
155 lock.lock();
156
157 while (tasks.size() == 0)
158 {
159 condition.wait(lock);
160 }
161
162 v = tasks.front();
163 tasks.pop();
164
165 lock.unlock();
166
167 condition.signal();
168 }
169
170 private:
171 ncnn::Mutex lock;
172 ncnn::ConditionVariable condition;
173 std::queue<Task> tasks;
174 };
175
176 TaskQueue toproc;
177 TaskQueue tosave;
178
179 class LoadThreadParams
180 {
181 public:
182 int scale;
183 int jobs_load;
184
185 // session data
186 std::vector<path_t> input_files;
187 std::vector<path_t> output_files;
188 };
189
load(void * args)190 void* load(void* args)
191 {
192 const LoadThreadParams* ltp = (const LoadThreadParams*)args;
193 const int count = ltp->input_files.size();
194 const int scale = ltp->scale;
195
196 #pragma omp parallel for schedule(static,1) num_threads(ltp->jobs_load)
197 for (int i=0; i<count; i++)
198 {
199 const path_t& imagepath = ltp->input_files[i];
200
201 int webp = 0;
202
203 unsigned char* pixeldata = 0;
204 int w;
205 int h;
206 int c;
207
208 #if _WIN32
209 FILE* fp = _wfopen(imagepath.c_str(), L"rb");
210 #else
211 FILE* fp = fopen(imagepath.c_str(), "rb");
212 #endif
213 if (fp)
214 {
215 // read whole file
216 unsigned char* filedata = 0;
217 int length = 0;
218 {
219 fseek(fp, 0, SEEK_END);
220 length = ftell(fp);
221 rewind(fp);
222 filedata = (unsigned char*)malloc(length);
223 if (filedata)
224 {
225 fread(filedata, 1, length, fp);
226 }
227 fclose(fp);
228 }
229
230 if (filedata)
231 {
232 pixeldata = webp_load(filedata, length, &w, &h, &c);
233 if (pixeldata)
234 {
235 webp = 1;
236 }
237 else
238 {
239 // not webp, try jpg png etc.
240 #if _WIN32
241 pixeldata = wic_decode_image(imagepath.c_str(), &w, &h, &c);
242 #else // _WIN32
243 pixeldata = stbi_load_from_memory(filedata, length, &w, &h, &c, 0);
244 if (pixeldata)
245 {
246 // stb_image auto channel
247 if (c == 1)
248 {
249 // grayscale -> rgb
250 stbi_image_free(pixeldata);
251 pixeldata = stbi_load_from_memory(filedata, length, &w, &h, &c, 3);
252 c = 3;
253 }
254 else if (c == 2)
255 {
256 // grayscale + alpha -> rgba
257 stbi_image_free(pixeldata);
258 pixeldata = stbi_load_from_memory(filedata, length, &w, &h, &c, 4);
259 c = 4;
260 }
261 }
262 #endif // _WIN32
263 }
264
265 free(filedata);
266 }
267 }
268 if (pixeldata)
269 {
270 Task v;
271 v.id = i;
272 v.inpath = imagepath;
273 v.outpath = ltp->output_files[i];
274
275 v.inimage = ncnn::Mat(w, h, (void*)pixeldata, (size_t)c, c);
276 v.outimage = ncnn::Mat(w * scale, h * scale, (size_t)c, c);
277
278 path_t ext = get_file_extension(v.outpath);
279 if (c == 4 && (ext == PATHSTR("jpg") || ext == PATHSTR("JPG") || ext == PATHSTR("jpeg") || ext == PATHSTR("JPEG")))
280 {
281 path_t output_filename2 = ltp->output_files[i] + PATHSTR(".png");
282 v.outpath = output_filename2;
283 #if _WIN32
284 fwprintf(stderr, L"image %ls has alpha channel ! %ls will output %ls\n", imagepath.c_str(), imagepath.c_str(), output_filename2.c_str());
285 #else // _WIN32
286 fprintf(stderr, "image %s has alpha channel ! %s will output %s\n", imagepath.c_str(), imagepath.c_str(), output_filename2.c_str());
287 #endif // _WIN32
288 }
289
290 toproc.put(v);
291 }
292 else
293 {
294 #if _WIN32
295 fwprintf(stderr, L"decode image %ls failed\n", imagepath.c_str());
296 #else // _WIN32
297 fprintf(stderr, "decode image %s failed\n", imagepath.c_str());
298 #endif // _WIN32
299 }
300 }
301
302 return 0;
303 }
304
305 class ProcThreadParams
306 {
307 public:
308 const RealSR* realsr;
309 };
310
proc(void * args)311 void* proc(void* args)
312 {
313 const ProcThreadParams* ptp = (const ProcThreadParams*)args;
314 const RealSR* realsr = ptp->realsr;
315
316 for (;;)
317 {
318 Task v;
319
320 toproc.get(v);
321
322 if (v.id == -233)
323 break;
324
325 realsr->process(v.inimage, v.outimage);
326
327 tosave.put(v);
328 }
329
330 return 0;
331 }
332
333 class SaveThreadParams
334 {
335 public:
336 int verbose;
337 };
338
save(void * args)339 void* save(void* args)
340 {
341 const SaveThreadParams* stp = (const SaveThreadParams*)args;
342 const int verbose = stp->verbose;
343
344 for (;;)
345 {
346 Task v;
347
348 tosave.get(v);
349
350 if (v.id == -233)
351 break;
352
353 // free input pixel data
354 {
355 unsigned char* pixeldata = (unsigned char*)v.inimage.data;
356 if (v.webp == 1)
357 {
358 free(pixeldata);
359 }
360 else
361 {
362 #if _WIN32
363 free(pixeldata);
364 #else
365 stbi_image_free(pixeldata);
366 #endif
367 }
368 }
369
370 int success = 0;
371
372 path_t ext = get_file_extension(v.outpath);
373
374 if (ext == PATHSTR("webp") || ext == PATHSTR("WEBP"))
375 {
376 success = webp_save(v.outpath.c_str(), v.outimage.w, v.outimage.h, v.outimage.elempack, (const unsigned char*)v.outimage.data);
377 }
378 else if (ext == PATHSTR("png") || ext == PATHSTR("PNG"))
379 {
380 #if _WIN32
381 success = wic_encode_image(v.outpath.c_str(), v.outimage.w, v.outimage.h, v.outimage.elempack, v.outimage.data);
382 #else
383 success = stbi_write_png(v.outpath.c_str(), v.outimage.w, v.outimage.h, v.outimage.elempack, v.outimage.data, 0);
384 #endif
385 }
386 else if (ext == PATHSTR("jpg") || ext == PATHSTR("JPG") || ext == PATHSTR("jpeg") || ext == PATHSTR("JPEG"))
387 {
388 #if _WIN32
389 success = wic_encode_jpeg_image(v.outpath.c_str(), v.outimage.w, v.outimage.h, v.outimage.elempack, v.outimage.data);
390 #else
391 success = stbi_write_jpg(v.outpath.c_str(), v.outimage.w, v.outimage.h, v.outimage.elempack, v.outimage.data, 100);
392 #endif
393 }
394 if (success)
395 {
396 if (verbose)
397 {
398 #if _WIN32
399 fwprintf(stderr, L"%ls -> %ls done\n", v.inpath.c_str(), v.outpath.c_str());
400 #else
401 fprintf(stderr, "%s -> %s done\n", v.inpath.c_str(), v.outpath.c_str());
402 #endif
403 }
404 }
405 else
406 {
407 #if _WIN32
408 fwprintf(stderr, L"encode image %ls failed\n", v.outpath.c_str());
409 #else
410 fprintf(stderr, "encode image %s failed\n", v.outpath.c_str());
411 #endif
412 }
413 }
414
415 return 0;
416 }
417
418
419 #if _WIN32
wmain(int argc,wchar_t ** argv)420 int wmain(int argc, wchar_t** argv)
421 #else
422 int main(int argc, char** argv)
423 #endif
424 {
425 path_t inputpath;
426 path_t outputpath;
427 int scale = 4;
428 std::vector<int> tilesize;
429 path_t model = PATHSTR("/usr/local/share/realsr-ncnn-vulkan/models-DF2K_JPEG");
430 std::vector<int> gpuid;
431 int jobs_load = 1;
432 std::vector<int> jobs_proc;
433 int jobs_save = 2;
434 int verbose = 0;
435 int tta_mode = 0;
436 path_t format = PATHSTR("png");
437
438 #if _WIN32
439 setlocale(LC_ALL, "");
440 wchar_t opt;
441 while ((opt = getopt(argc, argv, L"i:o:s:t:m:g:j:f:vxh")) != (wchar_t)-1)
442 {
443 switch (opt)
444 {
445 case L'i':
446 inputpath = optarg;
447 break;
448 case L'o':
449 outputpath = optarg;
450 break;
451 case L's':
452 scale = _wtoi(optarg);
453 break;
454 case L't':
455 tilesize = parse_optarg_int_array(optarg);
456 break;
457 case L'm':
458 model = optarg;
459 break;
460 case L'g':
461 gpuid = parse_optarg_int_array(optarg);
462 break;
463 case L'j':
464 swscanf(optarg, L"%d:%*[^:]:%d", &jobs_load, &jobs_save);
465 jobs_proc = parse_optarg_int_array(wcschr(optarg, L':') + 1);
466 break;
467 case L'f':
468 format = optarg;
469 break;
470 case L'v':
471 verbose = 1;
472 break;
473 case L'x':
474 tta_mode = 1;
475 break;
476 case L'h':
477 default:
478 print_usage();
479 return -1;
480 }
481 }
482 #else // _WIN32
483 int opt;
484 while ((opt = getopt(argc, argv, "i:o:s:t:m:g:j:f:vxh")) != -1)
485 {
486 switch (opt)
487 {
488 case 'i':
489 inputpath = optarg;
490 break;
491 case 'o':
492 outputpath = optarg;
493 break;
494 case 's':
495 scale = atoi(optarg);
496 break;
497 case 't':
498 tilesize = parse_optarg_int_array(optarg);
499 break;
500 case 'm':
501 model = optarg;
502 break;
503 case 'g':
504 gpuid = parse_optarg_int_array(optarg);
505 break;
506 case 'j':
507 sscanf(optarg, "%d:%*[^:]:%d", &jobs_load, &jobs_save);
508 jobs_proc = parse_optarg_int_array(strchr(optarg, ':') + 1);
509 break;
510 case 'f':
511 format = optarg;
512 break;
513 case 'v':
514 verbose = 1;
515 break;
516 case 'x':
517 tta_mode = 1;
518 break;
519 case 'h':
520 default:
521 print_usage();
522 return -1;
523 }
524 }
525 #endif // _WIN32
526
527 if (inputpath.empty() || outputpath.empty())
528 {
529 print_usage();
530 return -1;
531 }
532
533 if (scale != 4)
534 {
535 fprintf(stderr, "invalid scale argument\n");
536 return -1;
537 }
538
539 if (tilesize.size() != (gpuid.empty() ? 1 : gpuid.size()) && !tilesize.empty())
540 {
541 fprintf(stderr, "invalid tilesize argument\n");
542 return -1;
543 }
544
545 for (int i=0; i<(int)tilesize.size(); i++)
546 {
547 if (tilesize[i] != 0 && tilesize[i] < 32)
548 {
549 fprintf(stderr, "invalid tilesize argument\n");
550 return -1;
551 }
552 }
553
554 if (jobs_load < 1 || jobs_save < 1)
555 {
556 fprintf(stderr, "invalid thread count argument\n");
557 return -1;
558 }
559
560 if (jobs_proc.size() != (gpuid.empty() ? 1 : gpuid.size()) && !jobs_proc.empty())
561 {
562 fprintf(stderr, "invalid jobs_proc thread count argument\n");
563 return -1;
564 }
565
566 for (int i=0; i<(int)jobs_proc.size(); i++)
567 {
568 if (jobs_proc[i] < 1)
569 {
570 fprintf(stderr, "invalid jobs_proc thread count argument\n");
571 return -1;
572 }
573 }
574
575 if (!path_is_directory(outputpath))
576 {
577 // guess format from outputpath no matter what format argument specified
578 path_t ext = get_file_extension(outputpath);
579
580 if (ext == PATHSTR("png") || ext == PATHSTR("PNG"))
581 {
582 format = PATHSTR("png");
583 }
584 else if (ext == PATHSTR("webp") || ext == PATHSTR("WEBP"))
585 {
586 format = PATHSTR("webp");
587 }
588 else if (ext == PATHSTR("jpg") || ext == PATHSTR("JPG") || ext == PATHSTR("jpeg") || ext == PATHSTR("JPEG"))
589 {
590 format = PATHSTR("jpg");
591 }
592 else
593 {
594 fprintf(stderr, "invalid outputpath extension type\n");
595 return -1;
596 }
597 }
598
599 if (format != PATHSTR("png") && format != PATHSTR("webp") && format != PATHSTR("jpg"))
600 {
601 fprintf(stderr, "invalid format argument\n");
602 return -1;
603 }
604
605 // collect input and output filepath
606 std::vector<path_t> input_files;
607 std::vector<path_t> output_files;
608 {
609 if (path_is_directory(inputpath) && path_is_directory(outputpath))
610 {
611 std::vector<path_t> filenames;
612 int lr = list_directory(inputpath, filenames);
613 if (lr != 0)
614 return -1;
615
616 const int count = filenames.size();
617 input_files.resize(count);
618 output_files.resize(count);
619
620 path_t last_filename;
621 path_t last_filename_noext;
622 for (int i=0; i<count; i++)
623 {
624 path_t filename = filenames[i];
625 path_t filename_noext = get_file_name_without_extension(filename);
626 path_t output_filename = filename_noext + PATHSTR('.') + format;
627
628 // filename list is sorted, check if output image path conflicts
629 if (filename_noext == last_filename_noext)
630 {
631 path_t output_filename2 = filename + PATHSTR('.') + format;
632 #if _WIN32
633 fwprintf(stderr, L"both %ls and %ls output %ls ! %ls will output %ls\n", filename.c_str(), last_filename.c_str(), output_filename.c_str(), filename.c_str(), output_filename2.c_str());
634 #else
635 fprintf(stderr, "both %s and %s output %s ! %s will output %s\n", filename.c_str(), last_filename.c_str(), output_filename.c_str(), filename.c_str(), output_filename2.c_str());
636 #endif
637 output_filename = output_filename2;
638 }
639 else
640 {
641 last_filename = filename;
642 last_filename_noext = filename_noext;
643 }
644
645 input_files[i] = inputpath + PATHSTR('/') + filename;
646 output_files[i] = outputpath + PATHSTR('/') + output_filename;
647 }
648 }
649 else if (!path_is_directory(inputpath) && !path_is_directory(outputpath))
650 {
651 input_files.push_back(inputpath);
652 output_files.push_back(outputpath);
653 }
654 else
655 {
656 fprintf(stderr, "inputpath and outputpath must be either file or directory at the same time\n");
657 return -1;
658 }
659 }
660
661 int prepadding = 0;
662
663 if (model.find(PATHSTR("/usr/local/share/realsr-ncnn-vulkan/models-DF2K")) != path_t::npos
664 || model.find(PATHSTR("/usr/local/share/realsr-ncnn-vulkan/models-DF2K_JPEG")) != path_t::npos)
665 {
666 prepadding = 10;
667 }
668 else
669 {
670 fprintf(stderr, "unknown model dir type\n");
671 return -1;
672 }
673
674 #if _WIN32
675 wchar_t parampath[256];
676 wchar_t modelpath[256];
677 if (scale == 4)
678 {
679 swprintf(parampath, 256, L"%s/x4.param", model.c_str());
680 swprintf(modelpath, 256, L"%s/x4.bin", model.c_str());
681 }
682 #else
683 char parampath[256];
684 char modelpath[256];
685 if (scale == 4)
686 {
687 sprintf(parampath, "%s/x4.param", model.c_str());
688 sprintf(modelpath, "%s/x4.bin", model.c_str());
689 }
690 #endif
691
692 path_t paramfullpath = sanitize_filepath(parampath);
693 path_t modelfullpath = sanitize_filepath(modelpath);
694
695 #if _WIN32
696 CoInitializeEx(NULL, COINIT_MULTITHREADED);
697 #endif
698
699 ncnn::create_gpu_instance();
700
701 if (gpuid.empty())
702 {
703 gpuid.push_back(ncnn::get_default_gpu_index());
704 }
705
706 const int use_gpu_count = (int)gpuid.size();
707
708 if (jobs_proc.empty())
709 {
710 jobs_proc.resize(use_gpu_count, 2);
711 }
712
713 if (tilesize.empty())
714 {
715 tilesize.resize(use_gpu_count, 0);
716 }
717
718 int cpu_count = std::max(1, ncnn::get_cpu_count());
719 jobs_load = std::min(jobs_load, cpu_count);
720 jobs_save = std::min(jobs_save, cpu_count);
721
722 int gpu_count = ncnn::get_gpu_count();
723 for (int i=0; i<use_gpu_count; i++)
724 {
725 if (gpuid[i] < 0 || gpuid[i] >= gpu_count)
726 {
727 fprintf(stderr, "invalid gpu device\n");
728
729 ncnn::destroy_gpu_instance();
730 return -1;
731 }
732 }
733
734 int total_jobs_proc = 0;
735 for (int i=0; i<use_gpu_count; i++)
736 {
737 int gpu_queue_count = ncnn::get_gpu_info(gpuid[i]).compute_queue_count();
738 jobs_proc[i] = std::min(jobs_proc[i], gpu_queue_count);
739 total_jobs_proc += jobs_proc[i];
740 }
741
742 for (int i=0; i<use_gpu_count; i++)
743 {
744 if (tilesize[i] != 0)
745 continue;
746
747 uint32_t heap_budget = ncnn::get_gpu_device(gpuid[i])->get_heap_budget();
748
749 // more fine-grained tilesize policy here
750 if (model.find(PATHSTR("/usr/local/share/realsr-ncnn-vulkan/models-DF2K")) != path_t::npos
751 || model.find(PATHSTR("/usr/local/share/realsr-ncnn-vulkan/models-DF2K_JPEG")) != path_t::npos)
752 {
753 if (heap_budget > 1900)
754 tilesize[i] = 200;
755 else if (heap_budget > 550)
756 tilesize[i] = 100;
757 else if (heap_budget > 190)
758 tilesize[i] = 64;
759 else
760 tilesize[i] = 32;
761 }
762 }
763
764 {
765 std::vector<RealSR*> realsr(use_gpu_count);
766
767 for (int i=0; i<use_gpu_count; i++)
768 {
769 realsr[i] = new RealSR(gpuid[i], tta_mode);
770
771 realsr[i]->load(paramfullpath, modelfullpath);
772
773 realsr[i]->scale = scale;
774 realsr[i]->tilesize = tilesize[i];
775 realsr[i]->prepadding = prepadding;
776 }
777
778 // main routine
779 {
780 // load image
781 LoadThreadParams ltp;
782 ltp.scale = scale;
783 ltp.jobs_load = jobs_load;
784 ltp.input_files = input_files;
785 ltp.output_files = output_files;
786
787 ncnn::Thread load_thread(load, (void*)<p);
788
789 // realsr proc
790 std::vector<ProcThreadParams> ptp(use_gpu_count);
791 for (int i=0; i<use_gpu_count; i++)
792 {
793 ptp[i].realsr = realsr[i];
794 }
795
796 std::vector<ncnn::Thread*> proc_threads(total_jobs_proc);
797 {
798 int total_jobs_proc_id = 0;
799 for (int i=0; i<use_gpu_count; i++)
800 {
801 for (int j=0; j<jobs_proc[i]; j++)
802 {
803 proc_threads[total_jobs_proc_id++] = new ncnn::Thread(proc, (void*)&ptp[i]);
804 }
805 }
806 }
807
808 // save image
809 SaveThreadParams stp;
810 stp.verbose = verbose;
811
812 std::vector<ncnn::Thread*> save_threads(jobs_save);
813 for (int i=0; i<jobs_save; i++)
814 {
815 save_threads[i] = new ncnn::Thread(save, (void*)&stp);
816 }
817
818 // end
819 load_thread.join();
820
821 Task end;
822 end.id = -233;
823
824 for (int i=0; i<total_jobs_proc; i++)
825 {
826 toproc.put(end);
827 }
828
829 for (int i=0; i<total_jobs_proc; i++)
830 {
831 proc_threads[i]->join();
832 delete proc_threads[i];
833 }
834
835 for (int i=0; i<jobs_save; i++)
836 {
837 tosave.put(end);
838 }
839
840 for (int i=0; i<jobs_save; i++)
841 {
842 save_threads[i]->join();
843 delete save_threads[i];
844 }
845 }
846
847 for (int i=0; i<use_gpu_count; i++)
848 {
849 delete realsr[i];
850 }
851 realsr.clear();
852 }
853
854 ncnn::destroy_gpu_instance();
855
856 return 0;
857 }
858