1 /* GStreamer
2  * Copyright (C) <2014> Wim Taymans <wim.taymans@gmail.com>
3  *
4  * This library is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Library General Public
6  * License as published by the Free Software Foundation; either
7  * version 2 of the License, or (at your option) any later version.
8  *
9  * This library is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12  * Library General Public License for more details.
13  *
14  * You should have received a copy of the GNU Library General Public
15  * License along with this library; if not, write to the
16  * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
17  * Boston, MA 02110-1301, USA.
18  */
19 
20 #ifdef HAVE_CONFIG_H
21 #  include "config.h"
22 #endif
23 
24 #include <string.h>
25 #include <stdio.h>
26 #include <math.h>
27 
28 /**
29  * SECTION:gstvideoscaler
30  * @title: GstVideoScaler
31  * @short_description: Utility object for rescaling video frames
32  *
33  * #GstVideoScaler is a utility object for rescaling and resampling
34  * video frames using various interpolation / sampling methods.
35  *
36  */
37 
38 #ifndef DISABLE_ORC
39 #include <orc/orcfunctions.h>
40 #else
41 #define orc_memcpy memcpy
42 #endif
43 
44 #include "video-orc.h"
45 #include "video-scaler.h"
46 
47 #ifndef GST_DISABLE_GST_DEBUG
48 #define GST_CAT_DEFAULT ensure_debug_category()
49 static GstDebugCategory *
ensure_debug_category(void)50 ensure_debug_category (void)
51 {
52   static gsize cat_gonce = 0;
53 
54   if (g_once_init_enter (&cat_gonce)) {
55     gsize cat_done;
56 
57     cat_done = (gsize) _gst_debug_category_new ("video-scaler", 0,
58         "video-scaler object");
59 
60     g_once_init_leave (&cat_gonce, cat_done);
61   }
62 
63   return (GstDebugCategory *) cat_gonce;
64 }
65 
66 #else
67 #define ensure_debug_category() /* NOOP */
68 #endif /* GST_DISABLE_GST_DEBUG */
69 
70 #define SCALE_U8          12
71 #define SCALE_U8_ROUND    (1 << (SCALE_U8 -1))
72 #define SCALE_U8_LQ       6
73 #define SCALE_U8_LQ_ROUND (1 << (SCALE_U8_LQ -1))
74 #define SCALE_U16         12
75 #define SCALE_U16_ROUND   (1 << (SCALE_U16 -1))
76 
77 #define LQ
78 
79 typedef void (*GstVideoScalerHFunc) (GstVideoScaler * scale,
80     gpointer src, gpointer dest, guint dest_offset, guint width, guint n_elems);
81 typedef void (*GstVideoScalerVFunc) (GstVideoScaler * scale,
82     gpointer srcs[], gpointer dest, guint dest_offset, guint width,
83     guint n_elems);
84 
85 struct _GstVideoScaler
86 {
87   GstVideoResamplerMethod method;
88   GstVideoScalerFlags flags;
89 
90   GstVideoResampler resampler;
91 
92   gboolean merged;
93   gint in_y_offset;
94   gint out_y_offset;
95 
96   /* cached integer coefficients */
97   gint16 *taps_s16;
98   gint16 *taps_s16_4;
99   guint32 *offset_n;
100   /* for ORC */
101   gint inc;
102 
103   gint tmpwidth;
104   gpointer tmpline1;
105   gpointer tmpline2;
106 };
107 
108 static void
resampler_zip(GstVideoResampler * resampler,const GstVideoResampler * r1,const GstVideoResampler * r2)109 resampler_zip (GstVideoResampler * resampler, const GstVideoResampler * r1,
110     const GstVideoResampler * r2)
111 {
112   guint i, out_size, max_taps, n_phases;
113   gdouble *taps;
114   guint32 *offset, *phase;
115 
116   g_return_if_fail (r1->max_taps == r2->max_taps);
117 
118   out_size = r1->out_size + r2->out_size;
119   max_taps = r1->max_taps;
120   n_phases = out_size;
121   offset = g_malloc (sizeof (guint32) * out_size);
122   phase = g_malloc (sizeof (guint32) * n_phases);
123   taps = g_malloc (sizeof (gdouble) * max_taps * n_phases);
124 
125   resampler->in_size = r1->in_size + r2->in_size;
126   resampler->out_size = out_size;
127   resampler->max_taps = max_taps;
128   resampler->n_phases = n_phases;
129   resampler->offset = offset;
130   resampler->phase = phase;
131   resampler->n_taps = g_malloc (sizeof (guint32) * out_size);
132   resampler->taps = taps;
133 
134   for (i = 0; i < out_size; i++) {
135     guint idx = i / 2;
136     const GstVideoResampler *r;
137 
138     r = (i & 1) ? r2 : r1;
139 
140     offset[i] = r->offset[idx] * 2 + (i & 1);
141     phase[i] = i;
142 
143     memcpy (taps + i * max_taps, r->taps + r->phase[idx] * max_taps,
144         max_taps * sizeof (gdouble));
145   }
146 }
147 
148 static void
realloc_tmplines(GstVideoScaler * scale,gint n_elems,gint width)149 realloc_tmplines (GstVideoScaler * scale, gint n_elems, gint width)
150 {
151   scale->tmpline1 =
152       g_realloc (scale->tmpline1,
153       sizeof (gint32) * width * n_elems * scale->resampler.max_taps);
154   scale->tmpline2 =
155       g_realloc (scale->tmpline2, sizeof (gint32) * width * n_elems);
156   scale->tmpwidth = width;
157 }
158 
159 static void
scaler_dump(GstVideoScaler * scale)160 scaler_dump (GstVideoScaler * scale)
161 {
162 #if 0
163   gint i, j, in_size, out_size, max_taps;
164   guint32 *offset, *phase;
165   gdouble *taps;
166   GstVideoResampler *r = &scale->resampler;
167 
168   in_size = r->in_size;
169   out_size = r->out_size;
170   offset = r->offset;
171   phase = r->phase;
172   max_taps = r->max_taps;
173   taps = r->taps;
174 
175   g_print ("in %d, out %d, max_taps %d, n_phases %d\n", in_size, out_size,
176       max_taps, r->n_phases);
177 
178   for (i = 0; i < out_size; i++) {
179     g_print ("%d: \t%d \t%d:", i, offset[i], phase[i]);
180 
181     for (j = 0; j < max_taps; j++) {
182       g_print ("\t%f", taps[i * max_taps + j]);
183     }
184     g_print ("\n");
185   }
186 #endif
187 }
188 
189 #define INTERLACE_SHIFT 0.5
190 
191 /**
192  * gst_video_scaler_new: (skip)
193  * @method: a #GstVideoResamplerMethod
194  * @flags: #GstVideoScalerFlags
195  * @n_taps: number of taps to use
196  * @in_size: number of source elements
197  * @out_size: number of destination elements
198  * @options: (allow-none): extra options
199  *
200  * Make a new @method video scaler. @in_size source lines/pixels will
201  * be scaled to @out_size destination lines/pixels.
202  *
203  * @n_taps specifies the amount of pixels to use from the source for one output
204  * pixel. If n_taps is 0, this function chooses a good value automatically based
205  * on the @method and @in_size/@out_size.
206  *
207  * Returns: a #GstVideoScaler
208  */
209 GstVideoScaler *
gst_video_scaler_new(GstVideoResamplerMethod method,GstVideoScalerFlags flags,guint n_taps,guint in_size,guint out_size,GstStructure * options)210 gst_video_scaler_new (GstVideoResamplerMethod method, GstVideoScalerFlags flags,
211     guint n_taps, guint in_size, guint out_size, GstStructure * options)
212 {
213   GstVideoScaler *scale;
214 
215   g_return_val_if_fail (in_size != 0, NULL);
216   g_return_val_if_fail (out_size != 0, NULL);
217 
218   scale = g_slice_new0 (GstVideoScaler);
219 
220   GST_DEBUG ("%d %u  %u->%u", method, n_taps, in_size, out_size);
221 
222   scale->method = method;
223   scale->flags = flags;
224 
225   if (flags & GST_VIDEO_SCALER_FLAG_INTERLACED) {
226     GstVideoResampler tresamp, bresamp;
227     gdouble shift;
228 
229     shift = (INTERLACE_SHIFT * out_size) / in_size;
230 
231     gst_video_resampler_init (&tresamp, method,
232         GST_VIDEO_RESAMPLER_FLAG_HALF_TAPS, (out_size + 1) / 2, n_taps, shift,
233         (in_size + 1) / 2, (out_size + 1) / 2, options);
234 
235     n_taps = tresamp.max_taps;
236 
237     gst_video_resampler_init (&bresamp, method, 0, out_size - tresamp.out_size,
238         n_taps, -shift, in_size - tresamp.in_size,
239         out_size - tresamp.out_size, options);
240 
241     resampler_zip (&scale->resampler, &tresamp, &bresamp);
242     gst_video_resampler_clear (&tresamp);
243     gst_video_resampler_clear (&bresamp);
244   } else {
245     gst_video_resampler_init (&scale->resampler, method,
246         GST_VIDEO_RESAMPLER_FLAG_NONE, out_size, n_taps, 0.0, in_size, out_size,
247         options);
248   }
249 
250   if (out_size == 1)
251     scale->inc = 0;
252   else
253     scale->inc = ((in_size - 1) << 16) / (out_size - 1) - 1;
254 
255   scaler_dump (scale);
256   GST_DEBUG ("max_taps %d", scale->resampler.max_taps);
257 
258   return scale;
259 }
260 
261 /**
262  * gst_video_scaler_free:
263  * @scale: a #GstVideoScaler
264  *
265  * Free a previously allocated #GstVideoScaler @scale.
266  */
267 void
gst_video_scaler_free(GstVideoScaler * scale)268 gst_video_scaler_free (GstVideoScaler * scale)
269 {
270   g_return_if_fail (scale != NULL);
271 
272   gst_video_resampler_clear (&scale->resampler);
273   g_free (scale->taps_s16);
274   g_free (scale->taps_s16_4);
275   g_free (scale->offset_n);
276   g_free (scale->tmpline1);
277   g_free (scale->tmpline2);
278   g_slice_free (GstVideoScaler, scale);
279 }
280 
281 /**
282  * gst_video_scaler_get_max_taps:
283  * @scale: a #GstVideoScaler
284  *
285  * Get the maximum number of taps for @scale.
286  *
287  * Returns: the maximum number of taps
288  */
289 guint
gst_video_scaler_get_max_taps(GstVideoScaler * scale)290 gst_video_scaler_get_max_taps (GstVideoScaler * scale)
291 {
292   g_return_val_if_fail (scale != NULL, 0);
293 
294   return scale->resampler.max_taps;
295 }
296 
297 /**
298  * gst_video_scaler_get_coeff:
299  * @scale: a #GstVideoScaler
300  * @out_offset: an output offset
301  * @in_offset: result input offset
302  * @n_taps: result n_taps
303  *
304  * For a given pixel at @out_offset, get the first required input pixel at
305  * @in_offset and the @n_taps filter coefficients.
306  *
307  * Note that for interlaced content, @in_offset needs to be incremented with
308  * 2 to get the next input line.
309  *
310  * Returns: an array of @n_tap gdouble values with filter coefficients.
311  */
312 const gdouble *
gst_video_scaler_get_coeff(GstVideoScaler * scale,guint out_offset,guint * in_offset,guint * n_taps)313 gst_video_scaler_get_coeff (GstVideoScaler * scale,
314     guint out_offset, guint * in_offset, guint * n_taps)
315 {
316   guint offset, phase;
317 
318   g_return_val_if_fail (scale != NULL, NULL);
319   g_return_val_if_fail (out_offset < scale->resampler.out_size, NULL);
320 
321   offset = scale->resampler.offset[out_offset];
322   phase = scale->resampler.phase[out_offset];
323 
324   if (in_offset)
325     *in_offset = offset;
326   if (n_taps) {
327     *n_taps = scale->resampler.max_taps;
328     if (scale->flags & GST_VIDEO_SCALER_FLAG_INTERLACED)
329       *n_taps *= 2;
330   }
331   return scale->resampler.taps + phase * scale->resampler.max_taps;
332 }
333 
334 static gboolean
resampler_convert_coeff(const gdouble * src,gpointer dest,guint n,guint bits,guint precision)335 resampler_convert_coeff (const gdouble * src,
336     gpointer dest, guint n, guint bits, guint precision)
337 {
338   gdouble multiplier;
339   gint i, j;
340   gdouble offset, l_offset, h_offset;
341   gboolean exact = FALSE;
342 
343   multiplier = (1 << precision);
344 
345   /* Round to integer, but with an adjustable bias that we use to
346    * eliminate the DC error. */
347   l_offset = 0.0;
348   h_offset = 1.0;
349   offset = 0.5;
350 
351   for (i = 0; i < 64; i++) {
352     gint sum = 0;
353 
354     for (j = 0; j < n; j++) {
355       gint16 tap = floor (offset + src[j] * multiplier);
356 
357       ((gint16 *) dest)[j] = tap;
358 
359       sum += tap;
360     }
361     if (sum == (1 << precision)) {
362       exact = TRUE;
363       break;
364     }
365 
366     if (l_offset == h_offset)
367       break;
368 
369     if (sum < (1 << precision)) {
370       if (offset > l_offset)
371         l_offset = offset;
372       offset += (h_offset - l_offset) / 2;
373     } else {
374       if (offset < h_offset)
375         h_offset = offset;
376       offset -= (h_offset - l_offset) / 2;
377     }
378   }
379 
380   if (!exact)
381     GST_WARNING ("can't find exact taps");
382 
383   return exact;
384 }
385 
386 static void
make_s16_taps(GstVideoScaler * scale,gint n_elems,gint precision)387 make_s16_taps (GstVideoScaler * scale, gint n_elems, gint precision)
388 {
389   gint i, j, max_taps, n_phases, out_size, src_inc;
390   gint16 *taps_s16, *taps_s16_4;
391   gdouble *taps;
392   guint32 *phase, *offset, *offset_n;
393 
394   n_phases = scale->resampler.n_phases;
395   max_taps = scale->resampler.max_taps;
396 
397   taps = scale->resampler.taps;
398   taps_s16 = scale->taps_s16 = g_malloc (sizeof (gint16) * n_phases * max_taps);
399 
400   for (i = 0; i < n_phases; i++) {
401     resampler_convert_coeff (taps, taps_s16, max_taps, 16, precision);
402 
403     taps += max_taps;
404     taps_s16 += max_taps;
405   }
406 
407   out_size = scale->resampler.out_size;
408 
409   taps_s16 = scale->taps_s16;
410   phase = scale->resampler.phase;
411   offset = scale->resampler.offset;
412 
413   taps_s16_4 = scale->taps_s16_4 =
414       g_malloc (sizeof (gint16) * out_size * max_taps * 4);
415   offset_n = scale->offset_n =
416       g_malloc (sizeof (guint32) * out_size * max_taps);
417 
418   if (scale->flags & GST_VIDEO_SCALER_FLAG_INTERLACED)
419     src_inc = 2;
420   else
421     src_inc = 1;
422 
423   for (j = 0; j < max_taps; j++) {
424     for (i = 0; i < out_size; i++) {
425       gint16 tap;
426 
427       if (scale->merged) {
428         if ((i & 1) == scale->out_y_offset)
429           offset_n[j * out_size + i] = offset[i] + (2 * j);
430         else
431           offset_n[j * out_size + i] = offset[i] + (4 * j);
432       } else {
433         offset_n[j * out_size + i] = offset[i] + j * src_inc;
434       }
435       tap = taps_s16[phase[i] * max_taps + j];
436       taps_s16_4[(j * out_size + i) * n_elems + 0] = tap;
437       if (n_elems > 1)
438         taps_s16_4[(j * out_size + i) * n_elems + 1] = tap;
439       if (n_elems > 2)
440         taps_s16_4[(j * out_size + i) * n_elems + 2] = tap;
441       if (n_elems > 3)
442         taps_s16_4[(j * out_size + i) * n_elems + 3] = tap;
443     }
444   }
445 }
446 
447 #undef ACC_SCALE
448 
449 static void
video_scale_h_near_u8(GstVideoScaler * scale,gpointer src,gpointer dest,guint dest_offset,guint width,guint n_elems)450 video_scale_h_near_u8 (GstVideoScaler * scale,
451     gpointer src, gpointer dest, guint dest_offset, guint width, guint n_elems)
452 {
453   guint8 *s, *d;
454   gint i;
455 
456   d = (guint8 *) dest + dest_offset;
457   s = (guint8 *) src;
458 
459   {
460 #ifndef ACC_SCALE
461     guint32 *offset = scale->resampler.offset + dest_offset;
462 
463     for (i = 0; i < width; i++)
464       d[i] = s[offset[i]];
465 #else
466     gint acc = 0;
467 
468     for (i = 0; i < width; i++) {
469       gint j = (acc + 0x8000) >> 16;
470       d[i] = s[j];
471       acc += scale->inc;
472     }
473 #endif
474   }
475 }
476 
477 static void
video_scale_h_near_3u8(GstVideoScaler * scale,gpointer src,gpointer dest,guint dest_offset,guint width,guint n_elems)478 video_scale_h_near_3u8 (GstVideoScaler * scale,
479     gpointer src, gpointer dest, guint dest_offset, guint width, guint n_elems)
480 {
481   guint8 *s, *d;
482   gint i;
483 
484   d = (guint8 *) dest + dest_offset;
485   s = (guint8 *) src;
486 
487   {
488 #ifndef ACC_SCALE
489     guint32 *offset = scale->resampler.offset + dest_offset;
490 
491     for (i = 0; i < width; i++) {
492       gint j = offset[i] * 3;
493 
494       d[i * 3 + 0] = s[j + 0];
495       d[i * 3 + 1] = s[j + 1];
496       d[i * 3 + 2] = s[j + 2];
497     }
498 #else
499     gint acc = 0;
500 
501     for (i = 0; i < width; i++) {
502       gint j = ((acc + 0x8000) >> 16) * 3;
503 
504       d[i * 3 + 0] = s[j + 0];
505       d[i * 3 + 1] = s[j + 1];
506       d[i * 3 + 2] = s[j + 2];
507       acc += scale->inc;
508     }
509 #endif
510   }
511 }
512 
513 static void
video_scale_h_near_u16(GstVideoScaler * scale,gpointer src,gpointer dest,guint dest_offset,guint width,guint n_elems)514 video_scale_h_near_u16 (GstVideoScaler * scale,
515     gpointer src, gpointer dest, guint dest_offset, guint width, guint n_elems)
516 {
517   guint16 *s, *d;
518   gint i;
519 
520   d = (guint16 *) dest + dest_offset;
521   s = (guint16 *) src;
522 
523   {
524 #ifndef ACC_SCALE
525     guint32 *offset = scale->resampler.offset + dest_offset;
526 
527     for (i = 0; i < width; i++)
528       d[i] = s[offset[i]];
529 #else
530     gint acc = 0;
531 
532     for (i = 0; i < width; i++) {
533       gint j = (acc + 0x8000) >> 16;
534       d[i] = s[j];
535       acc += scale->inc;
536     }
537 #endif
538   }
539 }
540 
541 static void
video_scale_h_near_u32(GstVideoScaler * scale,gpointer src,gpointer dest,guint dest_offset,guint width,guint n_elems)542 video_scale_h_near_u32 (GstVideoScaler * scale,
543     gpointer src, gpointer dest, guint dest_offset, guint width, guint n_elems)
544 {
545   guint32 *s, *d;
546 
547   d = (guint32 *) dest + dest_offset;
548   s = (guint32 *) src;
549 
550 #if 0
551   /* ORC is slower on this */
552   video_orc_resample_h_near_u32_lq (d, s, 0, scale->inc, width);
553 #elif 0
554   video_orc_resample_h_near_u32 (d, s, offset, width);
555 #else
556   {
557     gint i;
558 #ifndef ACC_SCALE
559     guint32 *offset = scale->resampler.offset + dest_offset;
560 
561     for (i = 0; i < width; i++)
562       d[i] = s[offset[i]];
563 #else
564     gint acc = 0;
565 
566     for (i = 0; i < width; i++) {
567       gint j = (acc + 0x8000) >> 16;
568       d[i] = s[j];
569       acc += scale->inc;
570     }
571 #endif
572   }
573 #endif
574 }
575 
576 static void
video_scale_h_near_u64(GstVideoScaler * scale,gpointer src,gpointer dest,guint dest_offset,guint width,guint n_elems)577 video_scale_h_near_u64 (GstVideoScaler * scale,
578     gpointer src, gpointer dest, guint dest_offset, guint width, guint n_elems)
579 {
580   guint64 *s, *d;
581   gint i;
582   guint32 *offset;
583 
584   d = (guint64 *) dest + dest_offset;
585   s = (guint64 *) src;
586 
587   offset = scale->resampler.offset + dest_offset;
588   for (i = 0; i < width; i++)
589     d[i] = s[offset[i]];
590 }
591 
592 static void
video_scale_h_2tap_1u8(GstVideoScaler * scale,gpointer src,gpointer dest,guint dest_offset,guint width,guint n_elems)593 video_scale_h_2tap_1u8 (GstVideoScaler * scale,
594     gpointer src, gpointer dest, guint dest_offset, guint width, guint n_elems)
595 {
596   guint8 *s, *d;
597 
598   d = (guint8 *) dest + dest_offset;
599   s = (guint8 *) src;
600 
601   video_orc_resample_h_2tap_1u8_lq (d, s, 0, scale->inc, width);
602 }
603 
604 static void
video_scale_h_2tap_4u8(GstVideoScaler * scale,gpointer src,gpointer dest,guint dest_offset,guint width,guint n_elems)605 video_scale_h_2tap_4u8 (GstVideoScaler * scale,
606     gpointer src, gpointer dest, guint dest_offset, guint width, guint n_elems)
607 {
608   guint32 *s, *d;
609 
610   d = (guint32 *) dest + dest_offset;
611   s = (guint32 *) src;
612 
613   video_orc_resample_h_2tap_4u8_lq (d, s, 0, scale->inc, width);
614 }
615 
616 static void
video_scale_h_ntap_u8(GstVideoScaler * scale,gpointer src,gpointer dest,guint dest_offset,guint width,guint n_elems)617 video_scale_h_ntap_u8 (GstVideoScaler * scale,
618     gpointer src, gpointer dest, guint dest_offset, guint width, guint n_elems)
619 {
620   gint16 *taps;
621   gint i, max_taps, count;
622   gpointer d;
623   guint32 *offset_n;
624   guint8 *pixels;
625   gint16 *temp;
626 
627   if (scale->taps_s16 == NULL)
628 #ifdef LQ
629     make_s16_taps (scale, n_elems, SCALE_U8_LQ);
630 #else
631     make_s16_taps (scale, n_elems, SCALE_U8);
632 #endif
633 
634   max_taps = scale->resampler.max_taps;
635   offset_n = scale->offset_n;
636 
637   pixels = (guint8 *) scale->tmpline1;
638 
639   /* prepare the arrays */
640   count = width * max_taps;
641   switch (n_elems) {
642     case 1:
643     {
644       guint8 *s = (guint8 *) src;
645 
646       for (i = 0; i < count; i++)
647         pixels[i] = s[offset_n[i]];
648 
649       d = (guint8 *) dest + dest_offset;
650       break;
651     }
652     case 2:
653     {
654       guint16 *p16 = (guint16 *) pixels;
655       guint16 *s = (guint16 *) src;
656 
657       for (i = 0; i < count; i++)
658         p16[i] = s[offset_n[i]];
659 
660       d = (guint16 *) dest + dest_offset;
661       break;
662     }
663     case 3:
664     {
665       guint8 *s = (guint8 *) src;
666 
667       for (i = 0; i < count; i++) {
668         gint j = offset_n[i] * 3;
669         pixels[i * 3 + 0] = s[j + 0];
670         pixels[i * 3 + 1] = s[j + 1];
671         pixels[i * 3 + 2] = s[j + 2];
672       }
673       d = (guint8 *) dest + dest_offset * 3;
674       break;
675     }
676     case 4:
677     {
678       guint32 *p32 = (guint32 *) pixels;
679       guint32 *s = (guint32 *) src;
680 #if 0
681       video_orc_resample_h_near_u32 (p32, s, offset_n, count);
682 #else
683       for (i = 0; i < count; i++)
684         p32[i] = s[offset_n[i]];
685 #endif
686       d = (guint32 *) dest + dest_offset;
687       break;
688     }
689     default:
690       return;
691   }
692   temp = (gint16 *) scale->tmpline2;
693   taps = scale->taps_s16_4;
694   count = width * n_elems;
695 
696 #ifdef LQ
697   if (max_taps == 2) {
698     video_orc_resample_h_2tap_u8_lq (d, pixels, pixels + count, taps,
699         taps + count, count);
700   } else {
701     /* first pixels with first tap to temp */
702     if (max_taps >= 3) {
703       video_orc_resample_h_multaps3_u8_lq (temp, pixels, pixels + count,
704           pixels + count * 2, taps, taps + count, taps + count * 2, count);
705       max_taps -= 3;
706       pixels += count * 3;
707       taps += count * 3;
708     } else {
709       gint first = max_taps % 3;
710 
711       video_orc_resample_h_multaps_u8_lq (temp, pixels, taps, count);
712       video_orc_resample_h_muladdtaps_u8_lq (temp, 0, pixels + count, count,
713           taps + count, count * 2, count, first - 1);
714       max_taps -= first;
715       pixels += count * first;
716       taps += count * first;
717     }
718     while (max_taps > 3) {
719       if (max_taps >= 6) {
720         video_orc_resample_h_muladdtaps3_u8_lq (temp, pixels, pixels + count,
721             pixels + count * 2, taps, taps + count, taps + count * 2, count);
722         max_taps -= 3;
723         pixels += count * 3;
724         taps += count * 3;
725       } else {
726         video_orc_resample_h_muladdtaps_u8_lq (temp, 0, pixels, count,
727             taps, count * 2, count, max_taps - 3);
728         pixels += count * (max_taps - 3);
729         taps += count * (max_taps - 3);
730         max_taps = 3;
731       }
732     }
733     if (max_taps == 3) {
734       video_orc_resample_h_muladdscaletaps3_u8_lq (d, pixels, pixels + count,
735           pixels + count * 2, taps, taps + count, taps + count * 2, temp,
736           count);
737     } else {
738       if (max_taps) {
739         /* add other pixels with other taps to t4 */
740         video_orc_resample_h_muladdtaps_u8_lq (temp, 0, pixels, count,
741             taps, count * 2, count, max_taps);
742       }
743       /* scale and write final result */
744       video_orc_resample_scaletaps_u8_lq (d, temp, count);
745     }
746   }
747 #else
748   /* first pixels with first tap to t4 */
749   video_orc_resample_h_multaps_u8 (temp, pixels, taps, count);
750   /* add other pixels with other taps to t4 */
751   video_orc_resample_h_muladdtaps_u8 (temp, 0, pixels + count, count,
752       taps + count, count * 2, count, max_taps - 1);
753   /* scale and write final result */
754   video_orc_resample_scaletaps_u8 (d, temp, count);
755 #endif
756 }
757 
758 static void
video_scale_h_ntap_u16(GstVideoScaler * scale,gpointer src,gpointer dest,guint dest_offset,guint width,guint n_elems)759 video_scale_h_ntap_u16 (GstVideoScaler * scale,
760     gpointer src, gpointer dest, guint dest_offset, guint width, guint n_elems)
761 {
762   gint16 *taps;
763   gint i, max_taps, count;
764   gpointer d;
765   guint32 *offset_n;
766   guint16 *pixels;
767   gint32 *temp;
768 
769   if (scale->taps_s16 == NULL)
770     make_s16_taps (scale, n_elems, SCALE_U16);
771 
772   max_taps = scale->resampler.max_taps;
773   offset_n = scale->offset_n;
774 
775   pixels = (guint16 *) scale->tmpline1;
776   /* prepare the arrays FIXME, we can add this into ORC */
777   count = width * max_taps;
778   switch (n_elems) {
779     case 1:
780     {
781       guint16 *s = (guint16 *) src;
782 
783       for (i = 0; i < count; i++)
784         pixels[i] = s[offset_n[i]];
785 
786       d = (guint16 *) dest + dest_offset;
787       break;
788     }
789     case 4:
790     {
791       guint64 *p64 = (guint64 *) pixels;
792       guint64 *s = (guint64 *) src;
793 #if 0
794       video_orc_resample_h_near_u32 (p32, s, offset_n, count);
795 #else
796       for (i = 0; i < count; i++)
797         p64[i] = s[offset_n[i]];
798 #endif
799       d = (guint64 *) dest + dest_offset;
800       break;
801     }
802     default:
803       return;
804   }
805 
806   temp = (gint32 *) scale->tmpline2;
807   taps = scale->taps_s16_4;
808   count = width * n_elems;
809 
810   if (max_taps == 2) {
811     video_orc_resample_h_2tap_u16 (d, pixels, pixels + count, taps,
812         taps + count, count);
813   } else {
814     /* first pixels with first tap to t4 */
815     video_orc_resample_h_multaps_u16 (temp, pixels, taps, count);
816     /* add other pixels with other taps to t4 */
817     video_orc_resample_h_muladdtaps_u16 (temp, 0, pixels + count, count * 2,
818         taps + count, count * 2, count, max_taps - 1);
819     /* scale and write final result */
820     video_orc_resample_scaletaps_u16 (d, temp, count);
821   }
822 }
823 
824 static void
video_scale_v_near_u8(GstVideoScaler * scale,gpointer srcs[],gpointer dest,guint dest_offset,guint width,guint n_elems)825 video_scale_v_near_u8 (GstVideoScaler * scale,
826     gpointer srcs[], gpointer dest, guint dest_offset, guint width,
827     guint n_elems)
828 {
829   if (dest != srcs[0])
830     memcpy (dest, srcs[0], n_elems * width);
831 }
832 
833 static void
video_scale_v_near_u16(GstVideoScaler * scale,gpointer srcs[],gpointer dest,guint dest_offset,guint width,guint n_elems)834 video_scale_v_near_u16 (GstVideoScaler * scale,
835     gpointer srcs[], gpointer dest, guint dest_offset, guint width,
836     guint n_elems)
837 {
838   if (dest != srcs[0])
839     memcpy (dest, srcs[0], n_elems * 2 * width);
840 }
841 
842 static void
video_scale_v_2tap_u8(GstVideoScaler * scale,gpointer srcs[],gpointer dest,guint dest_offset,guint width,guint n_elems)843 video_scale_v_2tap_u8 (GstVideoScaler * scale,
844     gpointer srcs[], gpointer dest, guint dest_offset, guint width,
845     guint n_elems)
846 {
847   gint max_taps, src_inc;
848   guint8 *s1, *s2, *d;
849   gint16 p1;
850 
851   if (scale->taps_s16 == NULL)
852 #ifdef LQ
853     make_s16_taps (scale, n_elems, SCALE_U8_LQ + 2);
854 #else
855     make_s16_taps (scale, n_elems, SCALE_U8);
856 #endif
857 
858   max_taps = scale->resampler.max_taps;
859 
860   if (scale->flags & GST_VIDEO_SCALER_FLAG_INTERLACED)
861     src_inc = 2;
862   else
863     src_inc = 1;
864 
865   d = (guint8 *) dest;
866   s1 = (guint8 *) srcs[0 * src_inc];
867   s2 = (guint8 *) srcs[1 * src_inc];
868   p1 = scale->taps_s16[dest_offset * max_taps + 1];
869 
870 #ifdef LQ
871   video_orc_resample_v_2tap_u8_lq (d, s1, s2, p1, width * n_elems);
872 #else
873   video_orc_resample_v_2tap_u8 (d, s1, s2, p1, width * n_elems);
874 #endif
875 }
876 
877 static void
video_scale_v_2tap_u16(GstVideoScaler * scale,gpointer srcs[],gpointer dest,guint dest_offset,guint width,guint n_elems)878 video_scale_v_2tap_u16 (GstVideoScaler * scale,
879     gpointer srcs[], gpointer dest, guint dest_offset, guint width,
880     guint n_elems)
881 {
882   gint max_taps, src_inc;
883   guint16 *s1, *s2, *d;
884   gint16 p1;
885 
886   if (scale->taps_s16 == NULL)
887     make_s16_taps (scale, n_elems, SCALE_U16);
888 
889   max_taps = scale->resampler.max_taps;
890 
891   if (scale->flags & GST_VIDEO_SCALER_FLAG_INTERLACED)
892     src_inc = 2;
893   else
894     src_inc = 1;
895 
896   d = (guint16 *) dest;
897   s1 = (guint16 *) srcs[0 * src_inc];
898   s2 = (guint16 *) srcs[1 * src_inc];
899   p1 = scale->taps_s16[dest_offset * max_taps + 1];
900 
901   video_orc_resample_v_2tap_u16 (d, s1, s2, p1, width * n_elems);
902 }
903 
904 #if 0
905 static void
906 video_scale_h_4tap_8888 (GstVideoScaler * scale,
907     gpointer src, gpointer dest, guint dest_offset, guint width)
908 {
909   gint16 *taps;
910   gint i, max_taps, count;
911   guint8 *d;
912   guint32 *offset_n;
913   guint32 *pixels;
914 
915   if (scale->taps_s16 == NULL)
916     make_s16_taps (scale, n_elems, S16_SCALE);
917 
918   max_taps = scale->resampler.max_taps;
919   offset_n = scale->offset_n;
920 
921   d = (guint8 *) dest + 4 * dest_offset;
922 
923   /* prepare the arrays FIXME, we can add this into ORC */
924   count = width * max_taps;
925   pixels = (guint32 *) scale->tmpline1;
926   for (i = 0; i < count; i++)
927     pixels[i] = ((guint32 *) src)[offset_n[i]];
928 
929   taps = scale->taps_s16_4;
930   count = width * 4;
931 
932   video_orc_resample_h_4tap_8 (d, pixels, pixels + width, pixels + 2 * width,
933       pixels + 3 * width, taps, taps + count, taps + 2 * count,
934       taps + 3 * count, count);
935 }
936 #endif
937 
938 static void
video_scale_v_4tap_u8(GstVideoScaler * scale,gpointer srcs[],gpointer dest,guint dest_offset,guint width,guint n_elems)939 video_scale_v_4tap_u8 (GstVideoScaler * scale,
940     gpointer srcs[], gpointer dest, guint dest_offset, guint width,
941     guint n_elems)
942 {
943   gint max_taps;
944   guint8 *s1, *s2, *s3, *s4, *d;
945   gint p1, p2, p3, p4, src_inc;
946   gint16 *taps;
947 
948   if (scale->taps_s16 == NULL)
949 #ifdef LQ
950     make_s16_taps (scale, n_elems, SCALE_U8_LQ);
951 #else
952     make_s16_taps (scale, n_elems, SCALE_U8);
953 #endif
954 
955   max_taps = scale->resampler.max_taps;
956   taps = scale->taps_s16 + dest_offset * max_taps;
957 
958   if (scale->flags & GST_VIDEO_SCALER_FLAG_INTERLACED)
959     src_inc = 2;
960   else
961     src_inc = 1;
962 
963   d = (guint8 *) dest;
964   s1 = (guint8 *) srcs[0 * src_inc];
965   s2 = (guint8 *) srcs[1 * src_inc];
966   s3 = (guint8 *) srcs[2 * src_inc];
967   s4 = (guint8 *) srcs[3 * src_inc];
968   p1 = taps[0];
969   p2 = taps[1];
970   p3 = taps[2];
971   p4 = taps[3];
972 
973 #ifdef LQ
974   video_orc_resample_v_4tap_u8_lq (d, s1, s2, s3, s4, p1, p2, p3, p4,
975       width * n_elems);
976 #else
977   video_orc_resample_v_4tap_u8 (d, s1, s2, s3, s4, p1, p2, p3, p4,
978       width * n_elems);
979 #endif
980 }
981 
982 static void
video_scale_v_ntap_u8(GstVideoScaler * scale,gpointer srcs[],gpointer dest,guint dest_offset,guint width,guint n_elems)983 video_scale_v_ntap_u8 (GstVideoScaler * scale,
984     gpointer srcs[], gpointer dest, guint dest_offset, guint width,
985     guint n_elems)
986 {
987   gint16 *taps;
988   gint i, max_taps, count, src_inc;
989   gpointer d;
990   gint16 *temp;
991 
992   if (scale->taps_s16 == NULL)
993 #ifdef LQ
994     make_s16_taps (scale, n_elems, SCALE_U8_LQ);
995 #else
996     make_s16_taps (scale, n_elems, SCALE_U8);
997 #endif
998 
999   max_taps = scale->resampler.max_taps;
1000   taps = scale->taps_s16 + (scale->resampler.phase[dest_offset] * max_taps);
1001 
1002   d = (guint32 *) dest;
1003 
1004   if (scale->flags & GST_VIDEO_SCALER_FLAG_INTERLACED)
1005     src_inc = 2;
1006   else
1007     src_inc = 1;
1008 
1009   temp = (gint16 *) scale->tmpline2;
1010   count = width * n_elems;
1011 
1012 #ifdef LQ
1013   if (max_taps >= 4) {
1014     video_orc_resample_v_multaps4_u8_lq (temp, srcs[0], srcs[1 * src_inc],
1015         srcs[2 * src_inc], srcs[3 * src_inc], taps[0], taps[1], taps[2],
1016         taps[3], count);
1017     max_taps -= 4;
1018     srcs += 4 * src_inc;
1019     taps += 4;
1020   } else {
1021     gint first = (max_taps % 4);
1022 
1023     video_orc_resample_v_multaps_u8_lq (temp, srcs[0], taps[0], count);
1024     for (i = 1; i < first; i++) {
1025       video_orc_resample_v_muladdtaps_u8_lq (temp, srcs[i * src_inc], taps[i],
1026           count);
1027     }
1028     max_taps -= first;
1029     srcs += first * src_inc;
1030     taps += first;
1031   }
1032   while (max_taps > 4) {
1033     if (max_taps >= 8) {
1034       video_orc_resample_v_muladdtaps4_u8_lq (temp, srcs[0], srcs[1 * src_inc],
1035           srcs[2 * src_inc], srcs[3 * src_inc], taps[0], taps[1], taps[2],
1036           taps[3], count);
1037       max_taps -= 4;
1038       srcs += 4 * src_inc;
1039       taps += 4;
1040     } else {
1041       for (i = 0; i < max_taps - 4; i++)
1042         video_orc_resample_v_muladdtaps_u8_lq (temp, srcs[i * src_inc], taps[i],
1043             count);
1044       srcs += (max_taps - 4) * src_inc;
1045       taps += (max_taps - 4);
1046       max_taps = 4;
1047     }
1048   }
1049   if (max_taps == 4) {
1050     video_orc_resample_v_muladdscaletaps4_u8_lq (d, srcs[0], srcs[1 * src_inc],
1051         srcs[2 * src_inc], srcs[3 * src_inc], temp, taps[0], taps[1], taps[2],
1052         taps[3], count);
1053   } else {
1054     for (i = 0; i < max_taps; i++)
1055       video_orc_resample_v_muladdtaps_u8_lq (temp, srcs[i * src_inc], taps[i],
1056           count);
1057     video_orc_resample_scaletaps_u8_lq (d, temp, count);
1058   }
1059 
1060 #else
1061   video_orc_resample_v_multaps_u8 (temp, srcs[0], taps[0], count);
1062   for (i = 1; i < max_taps; i++) {
1063     video_orc_resample_v_muladdtaps_u8 (temp, srcs[i * src_inc], taps[i],
1064         count);
1065   }
1066   video_orc_resample_scaletaps_u8 (d, temp, count);
1067 #endif
1068 }
1069 
1070 static void
video_scale_v_ntap_u16(GstVideoScaler * scale,gpointer srcs[],gpointer dest,guint dest_offset,guint width,guint n_elems)1071 video_scale_v_ntap_u16 (GstVideoScaler * scale,
1072     gpointer srcs[], gpointer dest, guint dest_offset, guint width,
1073     guint n_elems)
1074 {
1075   gint16 *taps;
1076   gint i, max_taps, count, src_inc;
1077   gpointer d;
1078   gint32 *temp;
1079 
1080   if (scale->taps_s16 == NULL)
1081     make_s16_taps (scale, n_elems, SCALE_U16);
1082 
1083   max_taps = scale->resampler.max_taps;
1084   taps = scale->taps_s16 + (scale->resampler.phase[dest_offset] * max_taps);
1085 
1086   d = (guint16 *) dest;
1087 
1088   if (scale->flags & GST_VIDEO_SCALER_FLAG_INTERLACED)
1089     src_inc = 2;
1090   else
1091     src_inc = 1;
1092 
1093   temp = (gint32 *) scale->tmpline2;
1094   count = width * n_elems;
1095 
1096   video_orc_resample_v_multaps_u16 (temp, srcs[0], taps[0], count);
1097   for (i = 1; i < max_taps; i++) {
1098     video_orc_resample_v_muladdtaps_u16 (temp, srcs[i * src_inc], taps[i],
1099         count);
1100   }
1101   video_orc_resample_scaletaps_u16 (d, temp, count);
1102 }
1103 
1104 static gint
get_y_offset(GstVideoFormat format)1105 get_y_offset (GstVideoFormat format)
1106 {
1107   switch (format) {
1108     case GST_VIDEO_FORMAT_YUY2:
1109     case GST_VIDEO_FORMAT_YVYU:
1110       return 0;
1111     default:
1112     case GST_VIDEO_FORMAT_UYVY:
1113       return 1;
1114   }
1115 }
1116 
1117 /**
1118  * gst_video_scaler_combine_packed_YUV: (skip)
1119  * @y_scale: a scaler for the Y component
1120  * @uv_scale: a scaler for the U and V components
1121  * @in_format: the input video format
1122  * @out_format: the output video format
1123  *
1124  * Combine a scaler for Y and UV into one scaler for the packed @format.
1125  *
1126  * Returns: a new horizontal videoscaler for @format.
1127  *
1128  * Since: 1.6
1129  */
1130 GstVideoScaler *
gst_video_scaler_combine_packed_YUV(GstVideoScaler * y_scale,GstVideoScaler * uv_scale,GstVideoFormat in_format,GstVideoFormat out_format)1131 gst_video_scaler_combine_packed_YUV (GstVideoScaler * y_scale,
1132     GstVideoScaler * uv_scale, GstVideoFormat in_format,
1133     GstVideoFormat out_format)
1134 {
1135   GstVideoScaler *scale;
1136   GstVideoResampler *resampler;
1137   guint i, out_size, max_taps, n_phases;
1138   gdouble *taps;
1139   guint32 *offset, *phase;
1140 
1141   g_return_val_if_fail (y_scale != NULL, NULL);
1142   g_return_val_if_fail (uv_scale != NULL, NULL);
1143   g_return_val_if_fail (uv_scale->resampler.max_taps ==
1144       y_scale->resampler.max_taps, NULL);
1145 
1146   scale = g_slice_new0 (GstVideoScaler);
1147 
1148   scale->method = y_scale->method;
1149   scale->flags = y_scale->flags;
1150   scale->merged = TRUE;
1151 
1152   resampler = &scale->resampler;
1153 
1154   out_size = GST_ROUND_UP_4 (y_scale->resampler.out_size * 2);
1155   max_taps = y_scale->resampler.max_taps;
1156   n_phases = out_size;
1157   offset = g_malloc (sizeof (guint32) * out_size);
1158   phase = g_malloc (sizeof (guint32) * n_phases);
1159   taps = g_malloc (sizeof (gdouble) * max_taps * n_phases);
1160 
1161   resampler->in_size = y_scale->resampler.in_size * 2;
1162   resampler->out_size = out_size;
1163   resampler->max_taps = max_taps;
1164   resampler->n_phases = n_phases;
1165   resampler->offset = offset;
1166   resampler->phase = phase;
1167   resampler->n_taps = g_malloc (sizeof (guint32) * out_size);
1168   resampler->taps = taps;
1169 
1170   scale->in_y_offset = get_y_offset (in_format);
1171   scale->out_y_offset = get_y_offset (out_format);
1172   scale->inc = y_scale->inc;
1173 
1174   for (i = 0; i < out_size; i++) {
1175     gint ic;
1176 
1177     if ((i & 1) == scale->out_y_offset) {
1178       ic = MIN (i / 2, y_scale->resampler.out_size - 1);
1179       offset[i] = y_scale->resampler.offset[ic] * 2 + scale->in_y_offset;
1180       memcpy (taps + i * max_taps, y_scale->resampler.taps +
1181           y_scale->resampler.phase[ic] * max_taps, max_taps * sizeof (gdouble));
1182     } else {
1183       ic = MIN (i / 4, uv_scale->resampler.out_size - 1);
1184       offset[i] = uv_scale->resampler.offset[ic] * 4 + (i & 3);
1185       memcpy (taps + i * max_taps, uv_scale->resampler.taps +
1186           uv_scale->resampler.phase[ic] * max_taps,
1187           max_taps * sizeof (gdouble));
1188     }
1189     phase[i] = i;
1190   }
1191 
1192   scaler_dump (scale);
1193 
1194   return scale;
1195 }
1196 
1197 static gboolean
get_functions(GstVideoScaler * hscale,GstVideoScaler * vscale,GstVideoFormat format,GstVideoScalerHFunc * hfunc,GstVideoScalerVFunc * vfunc,gint * n_elems,guint * width,gint * bits)1198 get_functions (GstVideoScaler * hscale, GstVideoScaler * vscale,
1199     GstVideoFormat format,
1200     GstVideoScalerHFunc * hfunc, GstVideoScalerVFunc * vfunc,
1201     gint * n_elems, guint * width, gint * bits)
1202 {
1203   gboolean mono = FALSE;
1204 
1205   switch (format) {
1206     case GST_VIDEO_FORMAT_GRAY8:
1207       *bits = 8;
1208       *n_elems = 1;
1209       mono = TRUE;
1210       break;
1211     case GST_VIDEO_FORMAT_YUY2:
1212     case GST_VIDEO_FORMAT_YVYU:
1213     case GST_VIDEO_FORMAT_UYVY:
1214       *bits = 8;
1215       *n_elems = 1;
1216       *width = GST_ROUND_UP_4 (*width * 2);
1217       break;
1218     case GST_VIDEO_FORMAT_RGB:
1219     case GST_VIDEO_FORMAT_BGR:
1220     case GST_VIDEO_FORMAT_v308:
1221     case GST_VIDEO_FORMAT_IYU2:
1222       *bits = 8;
1223       *n_elems = 3;
1224       break;
1225     case GST_VIDEO_FORMAT_AYUV:
1226     case GST_VIDEO_FORMAT_RGBx:
1227     case GST_VIDEO_FORMAT_BGRx:
1228     case GST_VIDEO_FORMAT_xRGB:
1229     case GST_VIDEO_FORMAT_xBGR:
1230     case GST_VIDEO_FORMAT_RGBA:
1231     case GST_VIDEO_FORMAT_BGRA:
1232     case GST_VIDEO_FORMAT_ARGB:
1233     case GST_VIDEO_FORMAT_ABGR:
1234       *bits = 8;
1235       *n_elems = 4;
1236       break;
1237     case GST_VIDEO_FORMAT_ARGB64:
1238     case GST_VIDEO_FORMAT_AYUV64:
1239       *bits = 16;
1240       *n_elems = 4;
1241       break;
1242     case GST_VIDEO_FORMAT_GRAY16_LE:
1243     case GST_VIDEO_FORMAT_GRAY16_BE:
1244       *bits = 16;
1245       *n_elems = 1;
1246       mono = TRUE;
1247       break;
1248     case GST_VIDEO_FORMAT_NV12:
1249     case GST_VIDEO_FORMAT_NV16:
1250     case GST_VIDEO_FORMAT_NV21:
1251     case GST_VIDEO_FORMAT_NV24:
1252     case GST_VIDEO_FORMAT_NV61:
1253       *bits = 8;
1254       *n_elems = 2;
1255       break;
1256     default:
1257       return FALSE;
1258   }
1259   if (*bits == 8) {
1260     switch (hscale ? hscale->resampler.max_taps : 0) {
1261       case 0:
1262         break;
1263       case 1:
1264         if (*n_elems == 1)
1265           *hfunc = video_scale_h_near_u8;
1266         else if (*n_elems == 2)
1267           *hfunc = video_scale_h_near_u16;
1268         else if (*n_elems == 3)
1269           *hfunc = video_scale_h_near_3u8;
1270         else if (*n_elems == 4)
1271           *hfunc = video_scale_h_near_u32;
1272         break;
1273       case 2:
1274         if (*n_elems == 1 && mono)
1275           *hfunc = video_scale_h_2tap_1u8;
1276         else if (*n_elems == 4)
1277           *hfunc = video_scale_h_2tap_4u8;
1278         else
1279           *hfunc = video_scale_h_ntap_u8;
1280         break;
1281       default:
1282         *hfunc = video_scale_h_ntap_u8;
1283         break;
1284     }
1285     switch (vscale ? vscale->resampler.max_taps : 0) {
1286       case 0:
1287         break;
1288       case 1:
1289         *vfunc = video_scale_v_near_u8;
1290         break;
1291       case 2:
1292         *vfunc = video_scale_v_2tap_u8;
1293         break;
1294       case 4:
1295         *vfunc = video_scale_v_4tap_u8;
1296         break;
1297       default:
1298         *vfunc = video_scale_v_ntap_u8;
1299         break;
1300     }
1301   } else if (*bits == 16) {
1302     switch (hscale ? hscale->resampler.max_taps : 0) {
1303       case 0:
1304         break;
1305       case 1:
1306         if (*n_elems == 1)
1307           *hfunc = video_scale_h_near_u16;
1308         else
1309           *hfunc = video_scale_h_near_u64;
1310         break;
1311       default:
1312         *hfunc = video_scale_h_ntap_u16;
1313         break;
1314     }
1315     switch (vscale ? vscale->resampler.max_taps : 0) {
1316       case 0:
1317         break;
1318       case 1:
1319         *vfunc = video_scale_v_near_u16;
1320         break;
1321       case 2:
1322         *vfunc = video_scale_v_2tap_u16;
1323         break;
1324       default:
1325         *vfunc = video_scale_v_ntap_u16;
1326         break;
1327     }
1328   }
1329   return TRUE;
1330 }
1331 
1332 /**
1333  * gst_video_scaler_horizontal:
1334  * @scale: a #GstVideoScaler
1335  * @format: a #GstVideoFormat for @src and @dest
1336  * @src: source pixels
1337  * @dest: destination pixels
1338  * @dest_offset: the horizontal destination offset
1339  * @width: the number of pixels to scale
1340  *
1341  * Horizontally scale the pixels in @src to @dest, starting from @dest_offset
1342  * for @width samples.
1343  */
1344 void
gst_video_scaler_horizontal(GstVideoScaler * scale,GstVideoFormat format,gpointer src,gpointer dest,guint dest_offset,guint width)1345 gst_video_scaler_horizontal (GstVideoScaler * scale, GstVideoFormat format,
1346     gpointer src, gpointer dest, guint dest_offset, guint width)
1347 {
1348   gint n_elems, bits;
1349   GstVideoScalerHFunc func = NULL;
1350 
1351   g_return_if_fail (scale != NULL);
1352   g_return_if_fail (src != NULL);
1353   g_return_if_fail (dest != NULL);
1354   g_return_if_fail (dest_offset + width <= scale->resampler.out_size);
1355 
1356   if (!get_functions (scale, NULL, format, &func, NULL, &n_elems, &width, &bits)
1357       || func == NULL)
1358     goto no_func;
1359 
1360   if (scale->tmpwidth < width)
1361     realloc_tmplines (scale, n_elems, width);
1362 
1363   func (scale, src, dest, dest_offset, width, n_elems);
1364   return;
1365 
1366 no_func:
1367   {
1368     GST_WARNING ("no scaler function for format");
1369   }
1370 }
1371 
1372 /**
1373  * gst_video_scaler_vertical:
1374  * @scale: a #GstVideoScaler
1375  * @format: a #GstVideoFormat for @srcs and @dest
1376  * @src_lines: source pixels lines
1377  * @dest: destination pixels
1378  * @dest_offset: the vertical destination offset
1379  * @width: the number of pixels to scale
1380  *
1381  * Vertically combine @width pixels in the lines in @src_lines to @dest.
1382  * @dest is the location of the target line at @dest_offset and
1383  * @srcs are the input lines for @dest_offset, as obtained with
1384  * gst_video_scaler_get_info().
1385  */
1386 void
gst_video_scaler_vertical(GstVideoScaler * scale,GstVideoFormat format,gpointer src_lines[],gpointer dest,guint dest_offset,guint width)1387 gst_video_scaler_vertical (GstVideoScaler * scale, GstVideoFormat format,
1388     gpointer src_lines[], gpointer dest, guint dest_offset, guint width)
1389 {
1390   gint n_elems, bits;
1391   GstVideoScalerVFunc func = NULL;
1392 
1393   g_return_if_fail (scale != NULL);
1394   g_return_if_fail (src_lines != NULL);
1395   g_return_if_fail (dest != NULL);
1396   g_return_if_fail (dest_offset < scale->resampler.out_size);
1397 
1398   if (!get_functions (NULL, scale, format, NULL, &func, &n_elems, &width, &bits)
1399       || func == NULL)
1400     goto no_func;
1401 
1402   if (scale->tmpwidth < width)
1403     realloc_tmplines (scale, n_elems, width);
1404 
1405   func (scale, src_lines, dest, dest_offset, width, n_elems);
1406 
1407   return;
1408 
1409 no_func:
1410   {
1411     GST_WARNING ("no scaler function for format");
1412   }
1413 }
1414 
1415 
1416 /**
1417  * gst_video_scaler_2d:
1418  * @hscale: a horzontal #GstVideoScaler
1419  * @vscale: a vertical #GstVideoScaler
1420  * @format: a #GstVideoFormat for @srcs and @dest
1421  * @src: source pixels
1422  * @src_stride: source pixels stride
1423  * @dest: destination pixels
1424  * @dest_stride: destination pixels stride
1425  * @x: the horizontal destination offset
1426  * @y: the vertical destination offset
1427  * @width: the number of output pixels to scale
1428  * @height: the number of output lines to scale
1429  *
1430  * Scale a rectangle of pixels in @src with @src_stride to @dest with
1431  * @dest_stride using the horizontal scaler @hscaler and the vertical
1432  * scaler @vscale.
1433  *
1434  * One or both of @hscale and @vscale can be NULL to only perform scaling in
1435  * one dimension or do a copy without scaling.
1436  *
1437  * @x and @y are the coordinates in the destination image to process.
1438  */
1439 void
gst_video_scaler_2d(GstVideoScaler * hscale,GstVideoScaler * vscale,GstVideoFormat format,gpointer src,gint src_stride,gpointer dest,gint dest_stride,guint x,guint y,guint width,guint height)1440 gst_video_scaler_2d (GstVideoScaler * hscale, GstVideoScaler * vscale,
1441     GstVideoFormat format, gpointer src, gint src_stride,
1442     gpointer dest, gint dest_stride, guint x, guint y,
1443     guint width, guint height)
1444 {
1445   gint n_elems, bits;
1446   GstVideoScalerHFunc hfunc = NULL;
1447   GstVideoScalerVFunc vfunc = NULL;
1448   gint i;
1449 
1450   g_return_if_fail (src != NULL);
1451   g_return_if_fail (dest != NULL);
1452 
1453   if (!get_functions (hscale, vscale, format, &hfunc, &vfunc, &n_elems, &width,
1454           &bits))
1455     goto no_func;
1456 
1457 #define LINE(s,ss,i)  ((guint8 *)(s) + ((i) * (ss)))
1458 #define TMP_LINE(s,i,v) ((guint8 *)(s->tmpline1) + (((i) % (v)) * (sizeof (gint32) * width * n_elems)))
1459 
1460   if (vscale == NULL) {
1461     if (hscale == NULL) {
1462       guint xo, xw;
1463       guint8 *s, *d;
1464 
1465       xo = x * n_elems;
1466       xw = width * n_elems * (bits / 8);
1467 
1468       s = LINE (src, src_stride, y) + xo;
1469       d = LINE (dest, dest_stride, y) + xo;
1470 
1471       /* no scaling, do memcpy */
1472       for (i = y; i < height; i++) {
1473         memcpy (d, s, xw);
1474         d += dest_stride;
1475         s += src_stride;
1476       }
1477     } else {
1478       if (hscale->tmpwidth < width)
1479         realloc_tmplines (hscale, n_elems, width);
1480 
1481       /* only horizontal scaling */
1482       for (i = y; i < height; i++) {
1483         hfunc (hscale, LINE (src, src_stride, i), LINE (dest, dest_stride, i),
1484             x, width, n_elems);
1485       }
1486     }
1487   } else {
1488     guint v_taps;
1489     gpointer *lines;
1490 
1491     if (vscale->tmpwidth < width)
1492       realloc_tmplines (vscale, n_elems, width);
1493 
1494     v_taps = vscale->resampler.max_taps;
1495     if (vscale->flags & GST_VIDEO_SCALER_FLAG_INTERLACED)
1496       v_taps *= 2;
1497 
1498     lines = g_alloca (v_taps * sizeof (gpointer));
1499 
1500     if (hscale == NULL) {
1501       /* only vertical scaling */
1502       for (i = y; i < height; i++) {
1503         guint in, j;
1504 
1505         in = vscale->resampler.offset[i];
1506         for (j = 0; j < v_taps; j++)
1507           lines[j] = LINE (src, src_stride, in + j);
1508 
1509         vfunc (vscale, lines, LINE (dest, dest_stride, i), i, width, n_elems);
1510       }
1511     } else {
1512       gint s1, s2;
1513 
1514       if (hscale->tmpwidth < width)
1515         realloc_tmplines (hscale, n_elems, width);
1516 
1517       s1 = width * vscale->resampler.offset[height - 1];
1518       s2 = width * height;
1519 
1520       if (s1 <= s2) {
1521         gint tmp_in = vscale->resampler.offset[y];
1522 
1523         for (i = y; i < height; i++) {
1524           guint in, j;
1525 
1526           in = vscale->resampler.offset[i];
1527           while (tmp_in < in)
1528             tmp_in++;
1529           while (tmp_in < in + v_taps) {
1530             hfunc (hscale, LINE (src, src_stride, tmp_in), TMP_LINE (vscale,
1531                     tmp_in, v_taps), x, width, n_elems);
1532             tmp_in++;
1533           }
1534           for (j = 0; j < v_taps; j++)
1535             lines[j] = TMP_LINE (vscale, in + j, v_taps);
1536 
1537           vfunc (vscale, lines, LINE (dest, dest_stride, i), i, width, n_elems);
1538         }
1539       } else {
1540         guint vx, vw, w1, ws;
1541         guint h_taps;
1542 
1543         h_taps = hscale->resampler.max_taps;
1544         w1 = x + width - 1;
1545         ws = hscale->resampler.offset[w1];
1546 
1547         /* we need to estimate the area that we first need to scale in the
1548          * vertical direction. Scale x and width to find the lower bound and
1549          * overshoot the width to find the upper bound */
1550         vx = (hscale->inc * x) >> 16;
1551         vx = MIN (vx, hscale->resampler.offset[x]);
1552         vw = (hscale->inc * (x + width)) >> 16;
1553         if (hscale->merged) {
1554           if ((w1 & 1) == hscale->out_y_offset)
1555             vw = MAX (vw, ws + (2 * h_taps));
1556           else
1557             vw = MAX (vw, ws + (4 * h_taps));
1558         } else {
1559           vw = MAX (vw, ws + h_taps);
1560         }
1561         vw += 1;
1562         /* but clamp to max size */
1563         vw = MIN (vw, hscale->resampler.in_size);
1564 
1565         if (vscale->tmpwidth < vw)
1566           realloc_tmplines (vscale, n_elems, vw);
1567 
1568         for (i = y; i < height; i++) {
1569           guint in, j;
1570 
1571           in = vscale->resampler.offset[i];
1572           for (j = 0; j < v_taps; j++)
1573             lines[j] = LINE (src, src_stride, in + j) + vx * n_elems;
1574 
1575           vfunc (vscale, lines, TMP_LINE (vscale, 0, v_taps) + vx * n_elems, i,
1576               vw - vx, n_elems);
1577 
1578           hfunc (hscale, TMP_LINE (vscale, 0, v_taps), LINE (dest, dest_stride,
1579                   i), x, width, n_elems);
1580         }
1581       }
1582     }
1583   }
1584   return;
1585 
1586 no_func:
1587   {
1588     GST_WARNING ("no scaler function for format");
1589   }
1590 }
1591