xref: /qemu/migration/xbzrle.c (revision 78f314cf)
1 /*
2  * Xor Based Zero Run Length Encoding
3  *
4  * Copyright 2013 Red Hat, Inc. and/or its affiliates
5  *
6  * Authors:
7  *  Orit Wasserman  <owasserm@redhat.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2 or later.
10  * See the COPYING file in the top-level directory.
11  *
12  */
13 #include "qemu/osdep.h"
14 #include "qemu/cutils.h"
15 #include "qemu/host-utils.h"
16 #include "xbzrle.h"
17 
18 #if defined(CONFIG_AVX512BW_OPT)
19 #include <immintrin.h>
20 #include "host/cpuinfo.h"
21 
22 static int __attribute__((target("avx512bw")))
23 xbzrle_encode_buffer_avx512(uint8_t *old_buf, uint8_t *new_buf, int slen,
24                             uint8_t *dst, int dlen)
25 {
26     uint32_t zrun_len = 0, nzrun_len = 0;
27     int d = 0, i = 0, num = 0;
28     uint8_t *nzrun_start = NULL;
29     /* add 1 to include residual part in main loop */
30     uint32_t count512s = (slen >> 6) + 1;
31     /* countResidual is tail of data, i.e., countResidual = slen % 64 */
32     uint32_t count_residual = slen & 0b111111;
33     bool never_same = true;
34     uint64_t mask_residual = 1;
35     mask_residual <<= count_residual;
36     mask_residual -= 1;
37     __m512i r = _mm512_set1_epi32(0);
38 
39     while (count512s) {
40         int bytes_to_check = 64;
41         uint64_t mask = 0xffffffffffffffff;
42         if (count512s == 1) {
43             bytes_to_check = count_residual;
44             mask = mask_residual;
45         }
46         __m512i old_data = _mm512_mask_loadu_epi8(r,
47                                                   mask, old_buf + i);
48         __m512i new_data = _mm512_mask_loadu_epi8(r,
49                                                   mask, new_buf + i);
50         uint64_t comp = _mm512_cmpeq_epi8_mask(old_data, new_data);
51         count512s--;
52 
53         bool is_same = (comp & 0x1);
54         while (bytes_to_check) {
55             if (d + 2 > dlen) {
56                 return -1;
57             }
58             if (is_same) {
59                 if (nzrun_len) {
60                     d += uleb128_encode_small(dst + d, nzrun_len);
61                     if (d + nzrun_len > dlen) {
62                         return -1;
63                     }
64                     nzrun_start = new_buf + i - nzrun_len;
65                     memcpy(dst + d, nzrun_start, nzrun_len);
66                     d += nzrun_len;
67                     nzrun_len = 0;
68                 }
69                 /* 64 data at a time for speed */
70                 if (count512s && (comp == 0xffffffffffffffff)) {
71                     i += 64;
72                     zrun_len += 64;
73                     break;
74                 }
75                 never_same = false;
76                 num = ctz64(~comp);
77                 num = (num < bytes_to_check) ? num : bytes_to_check;
78                 zrun_len += num;
79                 bytes_to_check -= num;
80                 comp >>= num;
81                 i += num;
82                 if (bytes_to_check) {
83                     /* still has different data after same data */
84                     d += uleb128_encode_small(dst + d, zrun_len);
85                     zrun_len = 0;
86                 } else {
87                     break;
88                 }
89             }
90             if (never_same || zrun_len) {
91                 /*
92                  * never_same only acts if
93                  * data begins with diff in first count512s
94                  */
95                 d += uleb128_encode_small(dst + d, zrun_len);
96                 zrun_len = 0;
97                 never_same = false;
98             }
99             /* has diff, 64 data at a time for speed */
100             if ((bytes_to_check == 64) && (comp == 0x0)) {
101                 i += 64;
102                 nzrun_len += 64;
103                 break;
104             }
105             num = ctz64(comp);
106             num = (num < bytes_to_check) ? num : bytes_to_check;
107             nzrun_len += num;
108             bytes_to_check -= num;
109             comp >>= num;
110             i += num;
111             if (bytes_to_check) {
112                 /* mask like 111000 */
113                 d += uleb128_encode_small(dst + d, nzrun_len);
114                 /* overflow */
115                 if (d + nzrun_len > dlen) {
116                     return -1;
117                 }
118                 nzrun_start = new_buf + i - nzrun_len;
119                 memcpy(dst + d, nzrun_start, nzrun_len);
120                 d += nzrun_len;
121                 nzrun_len = 0;
122                 is_same = true;
123             }
124         }
125     }
126 
127     if (nzrun_len != 0) {
128         d += uleb128_encode_small(dst + d, nzrun_len);
129         /* overflow */
130         if (d + nzrun_len > dlen) {
131             return -1;
132         }
133         nzrun_start = new_buf + i - nzrun_len;
134         memcpy(dst + d, nzrun_start, nzrun_len);
135         d += nzrun_len;
136     }
137     return d;
138 }
139 
140 static int xbzrle_encode_buffer_int(uint8_t *old_buf, uint8_t *new_buf,
141                                     int slen, uint8_t *dst, int dlen);
142 
143 static int (*accel_func)(uint8_t *, uint8_t *, int, uint8_t *, int);
144 
145 static void __attribute__((constructor)) init_accel(void)
146 {
147     unsigned info = cpuinfo_init();
148     if (info & CPUINFO_AVX512BW) {
149         accel_func = xbzrle_encode_buffer_avx512;
150     } else {
151         accel_func = xbzrle_encode_buffer_int;
152     }
153 }
154 
155 int xbzrle_encode_buffer(uint8_t *old_buf, uint8_t *new_buf, int slen,
156                          uint8_t *dst, int dlen)
157 {
158     return accel_func(old_buf, new_buf, slen, dst, dlen);
159 }
160 
161 #define xbzrle_encode_buffer xbzrle_encode_buffer_int
162 #endif
163 
164 /*
165   page = zrun nzrun
166        | zrun nzrun page
167 
168   zrun = length
169 
170   nzrun = length byte...
171 
172   length = uleb128 encoded integer
173  */
174 int xbzrle_encode_buffer(uint8_t *old_buf, uint8_t *new_buf, int slen,
175                          uint8_t *dst, int dlen)
176 {
177     uint32_t zrun_len = 0, nzrun_len = 0;
178     int d = 0, i = 0;
179     long res;
180     uint8_t *nzrun_start = NULL;
181 
182     g_assert(!(((uintptr_t)old_buf | (uintptr_t)new_buf | slen) %
183                sizeof(long)));
184 
185     while (i < slen) {
186         /* overflow */
187         if (d + 2 > dlen) {
188             return -1;
189         }
190 
191         /* not aligned to sizeof(long) */
192         res = (slen - i) % sizeof(long);
193         while (res && old_buf[i] == new_buf[i]) {
194             zrun_len++;
195             i++;
196             res--;
197         }
198 
199         /* word at a time for speed */
200         if (!res) {
201             while (i < slen &&
202                    (*(long *)(old_buf + i)) == (*(long *)(new_buf + i))) {
203                 i += sizeof(long);
204                 zrun_len += sizeof(long);
205             }
206 
207             /* go over the rest */
208             while (i < slen && old_buf[i] == new_buf[i]) {
209                 zrun_len++;
210                 i++;
211             }
212         }
213 
214         /* buffer unchanged */
215         if (zrun_len == slen) {
216             return 0;
217         }
218 
219         /* skip last zero run */
220         if (i == slen) {
221             return d;
222         }
223 
224         d += uleb128_encode_small(dst + d, zrun_len);
225 
226         zrun_len = 0;
227         nzrun_start = new_buf + i;
228 
229         /* overflow */
230         if (d + 2 > dlen) {
231             return -1;
232         }
233         /* not aligned to sizeof(long) */
234         res = (slen - i) % sizeof(long);
235         while (res && old_buf[i] != new_buf[i]) {
236             i++;
237             nzrun_len++;
238             res--;
239         }
240 
241         /* word at a time for speed, use of 32-bit long okay */
242         if (!res) {
243             /* truncation to 32-bit long okay */
244             unsigned long mask = (unsigned long)0x0101010101010101ULL;
245             while (i < slen) {
246                 unsigned long xor;
247                 xor = *(unsigned long *)(old_buf + i)
248                     ^ *(unsigned long *)(new_buf + i);
249                 if ((xor - mask) & ~xor & (mask << 7)) {
250                     /* found the end of an nzrun within the current long */
251                     while (old_buf[i] != new_buf[i]) {
252                         nzrun_len++;
253                         i++;
254                     }
255                     break;
256                 } else {
257                     i += sizeof(long);
258                     nzrun_len += sizeof(long);
259                 }
260             }
261         }
262 
263         d += uleb128_encode_small(dst + d, nzrun_len);
264         /* overflow */
265         if (d + nzrun_len > dlen) {
266             return -1;
267         }
268         memcpy(dst + d, nzrun_start, nzrun_len);
269         d += nzrun_len;
270         nzrun_len = 0;
271     }
272 
273     return d;
274 }
275 
276 int xbzrle_decode_buffer(uint8_t *src, int slen, uint8_t *dst, int dlen)
277 {
278     int i = 0, d = 0;
279     int ret;
280     uint32_t count = 0;
281 
282     while (i < slen) {
283 
284         /* zrun */
285         if ((slen - i) < 2) {
286             return -1;
287         }
288 
289         ret = uleb128_decode_small(src + i, &count);
290         if (ret < 0 || (i && !count)) {
291             return -1;
292         }
293         i += ret;
294         d += count;
295 
296         /* overflow */
297         if (d > dlen) {
298             return -1;
299         }
300 
301         /* nzrun */
302         if ((slen - i) < 2) {
303             return -1;
304         }
305 
306         ret = uleb128_decode_small(src + i, &count);
307         if (ret < 0 || !count) {
308             return -1;
309         }
310         i += ret;
311 
312         /* overflow */
313         if (d + count > dlen || i + count > slen) {
314             return -1;
315         }
316 
317         memcpy(dst + d, src + i, count);
318         d += count;
319         i += count;
320     }
321 
322     return d;
323 }
324