1 // z.lib example code for tile-based threading.
2 //
3 // Example code demonstrates the use of z.lib to scale a single image by
4 // dividing the output into tiles. For processing multiple images, it is
5 // recommended to use frame-based threading for higher efficiency.
6
7 #include <algorithm>
8 #include <cstddef>
9 #include <cstdint>
10 #include <iostream>
11 #include <memory>
12 #include <mutex>
13 #include <stdexcept>
14 #include <system_error>
15 #include <thread>
16 #include <vector>
17
18 #include <zimg++.hpp>
19
20 #include "aligned_malloc.h"
21 #include "argparse.h"
22 #include "win32_bitmap.h"
23
24 #if ZIMG_API_VERSION < ZIMG_MAKE_API_VERSION(2, 1)
25 #error API 2.1 required
26 #endif
27
28 namespace {
29
30 struct Arguments {
31 const char *inpath;
32 const char *outpath;
33 unsigned out_w;
34 unsigned out_h;
35 unsigned tile_width;
36 unsigned tile_height;
37 unsigned threads;
38 char interactive;
39 char opt;
40 };
41
42 const ArgparseOption program_switches[] = {
43 { OPTION_UINT, nullptr, "tile-width", offsetof(Arguments, tile_width), nullptr, "tile width" },
44 { OPTION_UINT, nullptr, "tile-height", offsetof(Arguments, tile_height), nullptr, "tile height" },
45 { OPTION_UINT, nullptr, "threads", offsetof(Arguments, threads), nullptr, "number of threads" },
46 { OPTION_FLAG, "i", "interactive", offsetof(Arguments, interactive), nullptr, "interactive mode" },
47 { OPTION_NULL },
48 };
49
50 const ArgparseOption program_positional[] = {
51 { OPTION_STRING, nullptr, "inpath", offsetof(Arguments, inpath), nullptr, "input path" },
52 { OPTION_STRING, nullptr, "outpath", offsetof(Arguments, outpath), nullptr, "output path" },
53 { OPTION_UINT, "w", "width", offsetof(Arguments, out_w), nullptr, "width" },
54 { OPTION_UINT, "h", "height", offsetof(Arguments, out_h), nullptr, "height" },
55 { OPTION_NULL },
56 };
57
58 const ArgparseCommandLine program_def = { program_switches, program_positional, "tile_example", "resize BMP images with tile-based threading" };
59
60
61 struct TileTask {
62 zimgxx::zimage_format src_format;
63 zimgxx::zimage_format dst_format;
64 unsigned tile_left;
65 unsigned tile_top;
66 };
67
68 struct Callback {
69 const WindowsBitmap *in_bmp;
70 WindowsBitmap *out_bmp;
71 const TileTask *task;
72 const zimgxx::zimage_buffer *src_buf;
73 const zimgxx::zimage_buffer *dst_buf;
74 };
75
allocate_buffer(const zimgxx::zimage_format & format,unsigned count)76 std::pair<zimgxx::zimage_buffer, std::shared_ptr<void>> allocate_buffer(const zimgxx::zimage_format &format, unsigned count)
77 {
78 zimgxx::zimage_buffer buffer;
79 std::shared_ptr<void> handle;
80 unsigned char *ptr;
81
82 unsigned mask = zimg_select_buffer_mask(count);
83 size_t channel_size[3] = { 0 };
84 size_t pixel_size;
85
86 count = (mask == ZIMG_BUFFER_MAX) ? format.height : mask + 1;
87
88 if (format.pixel_type == ZIMG_PIXEL_FLOAT)
89 pixel_size = sizeof(float);
90 else if (format.pixel_type == ZIMG_PIXEL_WORD || format.pixel_type == ZIMG_PIXEL_HALF)
91 pixel_size = sizeof(uint16_t);
92 else
93 pixel_size = sizeof(uint8_t);
94
95 for (unsigned p = 0; p < (format.color_family == ZIMG_COLOR_GREY ? 1U : 3U); ++p) {
96 unsigned count_plane = p ? count >> format.subsample_h : count;
97 unsigned mask_plane = (mask == ZIMG_BUFFER_MAX) ? mask : mask >> format.subsample_h;
98 size_t row_size = format.width * pixel_size;
99 ptrdiff_t stride = (row_size + 31) & ~31;
100
101 buffer.mask(p) = mask_plane;
102 buffer.stride(p) = stride;
103 channel_size[p] = static_cast<size_t>(stride) * count_plane;
104 }
105
106 handle.reset(aligned_malloc(channel_size[0] + channel_size[1] + channel_size[2], 32), &aligned_free);
107 ptr = static_cast<unsigned char *>(handle.get());
108
109 for (unsigned p = 0; p < (format.color_family == ZIMG_COLOR_GREY ? 1U : 3U); ++p) {
110 buffer.data(p) = ptr;
111 ptr += channel_size[p];
112 }
113
114 return{ buffer, handle };
115 }
116
allocate_buffer(size_t size)117 std::shared_ptr<void> allocate_buffer(size_t size)
118 {
119 return{ aligned_malloc(size, 32), &aligned_free };
120 }
121
unpack_bmp(void * user,unsigned i,unsigned left,unsigned right)122 int unpack_bmp(void *user, unsigned i, unsigned left, unsigned right)
123 {
124 Callback *cb = static_cast<Callback *>(user);
125
126 // Pixel indices in the input image are relative to the whole image.
127 const uint8_t *packed_bgr = cb->in_bmp->read_ptr() + static_cast<ptrdiff_t>(i) * cb->in_bmp->stride();
128 uint8_t *planar_r = static_cast<uint8_t *>(cb->src_buf->line_at(i, 0));
129 uint8_t *planar_g = static_cast<uint8_t *>(cb->src_buf->line_at(i, 1));
130 uint8_t *planar_b = static_cast<uint8_t *>(cb->src_buf->line_at(i, 2));
131 unsigned step = cb->in_bmp->bit_count() / 8;
132
133 for (unsigned j = left; j < right; ++j) {
134 uint8_t r, g, b;
135
136 b = packed_bgr[j * step + 0];
137 g = packed_bgr[j * step + 1];
138 r = packed_bgr[j * step + 2];
139
140 planar_r[j] = r;
141 planar_g[j] = g;
142 planar_b[j] = b;
143 }
144
145 return 0;
146 }
147
pack_bmp(void * user,unsigned i,unsigned left,unsigned right)148 int pack_bmp(void *user, unsigned i, unsigned left, unsigned right)
149 {
150 Callback *cb = static_cast<Callback *>(user);
151
152 // Pixel indices in the output image are relative to the tile.
153 const uint8_t *planar_r = static_cast<const uint8_t *>(cb->dst_buf->line_at(i, 0));
154 const uint8_t *planar_g = static_cast<const uint8_t *>(cb->dst_buf->line_at(i, 1));
155 const uint8_t *planar_b = static_cast<const uint8_t *>(cb->dst_buf->line_at(i, 2));
156 uint8_t *packed_bgr = cb->out_bmp->write_ptr() + static_cast<ptrdiff_t>(i + cb->task->tile_top) * cb->out_bmp->stride() + (cb->task->tile_left * cb->out_bmp->bit_count() / 8);
157 unsigned step = cb->out_bmp->bit_count() / 8;
158
159 for (unsigned j = left; j < right; ++j) {
160 uint8_t r, g, b;
161
162 r = planar_r[j];
163 g = planar_g[j];
164 b = planar_b[j];
165
166 packed_bgr[j * step + 0] = b;
167 packed_bgr[j * step + 1] = g;
168 packed_bgr[j * step + 2] = r;
169 }
170
171 return 0;
172 }
173
174 void thread_func(const WindowsBitmap *in_bmp, WindowsBitmap *out_bmp, std::vector<TileTask> *tasks, std::mutex *mutex, std::exception_ptr *eptr, bool interactive);
175
execute(const Arguments & args)176 void execute(const Arguments &args)
177 {
178 WindowsBitmap in_bmp{ args.inpath, WindowsBitmap::READ_TAG };
179 WindowsBitmap out_bmp{ args.outpath, static_cast<int>(args.out_w), static_cast<int>(args.out_h), 24 };
180
181 // (1) Fill the common fields in the format descriptors for the input and output files.
182 zimgxx::zimage_format in_format;
183 zimgxx::zimage_format out_format;
184
185 in_format.width = in_bmp.width();
186 in_format.height = in_bmp.height();
187 in_format.pixel_type = ZIMG_PIXEL_BYTE;
188 in_format.color_family = ZIMG_COLOR_RGB;
189 in_format.pixel_range = ZIMG_RANGE_FULL;
190
191 out_format.pixel_type = ZIMG_PIXEL_BYTE;
192 out_format.color_family = ZIMG_COLOR_RGB;
193 out_format.pixel_range = ZIMG_RANGE_FULL;
194
195 std::vector<TileTask> task_queue;
196
197 // (2) Calculate the bounds of the input regions from the output regions. For
198 // each tile, a graph creates an image of tile_width x tile_height from a
199 // subset of the input image. Note that the input tile is specified through
200 // the active_region field, unlike the output tile.
201 double scale_w = static_cast<double>(in_bmp.width()) / args.out_w;
202 double scale_h = static_cast<double>(in_bmp.height()) / args.out_h;
203
204 // The destination buffer passed to zimg_filter_graph_process will point to
205 // the upper-left corner of the tile. As a result, when not using a pack
206 // callback, incrementing the output image by tile_width pixels must
207 // maintain alignment.
208 if (args.tile_width % 32)
209 std::cout << "warning: tile width results in unaligned image\n";
210
211 for (unsigned i = 0; i < args.out_h; i += args.tile_height) {
212 for (unsigned j = 0; j < args.out_w; j += args.tile_width) {
213 zimgxx::zimage_format tile_in_format = in_format;
214 zimgxx::zimage_format tile_out_format = out_format;
215
216 unsigned tile_right = out_bmp.width() - j >= args.tile_width ? j + args.tile_width : out_bmp.width();
217 unsigned tile_bottom = out_bmp.height() - i >= args.tile_height ? i + args.tile_height : out_bmp.height();
218
219 tile_in_format.active_region.left = j * scale_w;
220 tile_in_format.active_region.top = i * scale_h;
221 tile_in_format.active_region.width = (tile_right - j) * scale_w;
222 tile_in_format.active_region.height = (tile_bottom - i) * scale_h;
223
224 tile_out_format.width = tile_right - j;
225 tile_out_format.height = tile_bottom - i;
226
227 task_queue.push_back({ tile_in_format, tile_out_format, j, i });
228 }
229 }
230
231 // (3) Distribute the tiles across threads. Note that the calls to
232 // zimg_filter_graph_create must also be parallelized for maximum effect.
233 std::vector<std::thread> threads;
234 unsigned num_threads = args.interactive ? 1 : (args.threads ? args.threads : std::thread::hardware_concurrency());
235 std::exception_ptr eptr;
236 std::mutex mutex;
237
238 // Process tiles in raster order.
239 std::reverse(task_queue.begin(), task_queue.end());
240
241 threads.reserve(num_threads);
242 for (unsigned i = 0; i < num_threads; ++i) {
243 threads.emplace_back(thread_func, &in_bmp, &out_bmp, &task_queue, &mutex, &eptr, !!args.interactive);
244 }
245
246 for (std::thread &th : threads) {
247 th.join();
248 }
249
250 if (eptr)
251 std::rethrow_exception(eptr);
252 }
253
thread_func(const WindowsBitmap * in_bmp,WindowsBitmap * out_bmp,std::vector<TileTask> * tasks,std::mutex * mutex,std::exception_ptr * eptr,bool interactive)254 void thread_func(const WindowsBitmap *in_bmp, WindowsBitmap *out_bmp, std::vector<TileTask> *tasks, std::mutex *mutex, std::exception_ptr *eptr, bool interactive)
255 {
256 try {
257 while (true) {
258 std::unique_lock<std::mutex> lock{ *mutex };
259 if (tasks->empty())
260 break;
261
262 TileTask task = tasks->back();
263 tasks->pop_back();
264 lock.unlock();
265
266 // (4) Build the processing context for the tile.
267 zimgxx::FilterGraph graph{ zimgxx::FilterGraph::build(task.src_format, task.dst_format) };
268 unsigned input_buffering = graph.get_input_buffering();
269 unsigned output_buffering = graph.get_output_buffering();
270 size_t tmp_size = graph.get_tmp_size();
271
272 if (input_buffering == ZIMG_BUFFER_MAX || output_buffering == ZIMG_BUFFER_MAX)
273 throw std::logic_error{ "graph can not be processed with tiles" };
274
275 // (5) Allocate scanline buffers for the input and output data.
276 auto src_buf = allocate_buffer(task.src_format, input_buffering);
277 auto dst_buf = allocate_buffer(task.dst_format, output_buffering);
278 auto tmp = allocate_buffer(tmp_size);
279
280 // (6) Process the tile.
281 Callback cb{ in_bmp, out_bmp, &task, &src_buf.first, &dst_buf.first };
282 graph.process(src_buf.first.as_const(), dst_buf.first, tmp.get(), unpack_bmp, &cb, pack_bmp, &cb);
283
284 if (interactive) {
285 out_bmp->flush();
286 std::cout << "Press enter to continue...";
287 std::cin.get();
288 }
289 }
290 } catch (...) {
291 std::lock_guard<std::mutex> lock{ *mutex };
292 *eptr = std::current_exception();
293 }
294 }
295
296 } // namespace
297
298
main(int argc,char ** argv)299 int main(int argc, char **argv)
300 {
301 Arguments args{};
302 int ret;
303
304 args.tile_width = 512;
305 args.tile_height = 512;
306
307 if ((ret = argparse_parse(&program_def, &args, argc, argv)) < 0)
308 return ret == ARGPARSE_HELP_MESSAGE ? 0 : ret;
309
310 if (zimg_get_api_version(nullptr, nullptr) < ZIMG_MAKE_API_VERSION(2, 1)) {
311 std::cerr << "error: subpixel operation requires API 2.1\n";
312 return 2;
313 }
314
315 // Prior to z.lib 2.9, using horizontal tiling results in redundant loading
316 // of scanlines above the first row in the tile.
317 if (args.tile_height < args.out_h) {
318 unsigned version[3];
319 zimg_get_version_info(version, version + 1, version + 2);
320
321 if (version[0] < 2 || (version[0] == 2 && version[1] < 9))
322 std::cerr << "warning: horizontal tiling may be slow in z.lib versions prior to 2.9\n";
323 }
324
325 try {
326 execute(args);
327 } catch (const std::system_error &e) {
328 std::cerr << "system_error " << e.code() << ": " << e.what() << '\n';
329 return 2;
330 } catch (const zimgxx::zerror &e) {
331 std::cerr << "zimg error " << e.code << ": " << e.msg << '\n';
332 return 2;
333 } catch (const std::runtime_error &e) {
334 std::cerr << "runtime_error: " << e.what() << '\n';
335 return 2;
336 } catch (const std::logic_error &e) {
337 std::cerr << "logic_error: " << e.what() << '\n';
338 return 2;
339 }
340
341 return 0;
342 }
343