1 /*
2  * Copyright 2011-2017 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 CCL_NAMESPACE_BEGIN
18 
19 #define ccl_get_feature(buffer, pass) (buffer)[(pass)*pass_stride]
20 
21 /* Loop over the pixels in the range [low.x, high.x) x [low.y, high.y).+ * pixel_buffer always
22  * points to the current pixel in the first pass. Repeat the loop for every secondary frame if
23  * there are any. */
24 #define FOR_PIXEL_WINDOW \
25   for (int frame = 0; frame < tile_info->num_frames; frame++) { \
26     pixel.z = tile_info->frames[frame]; \
27     pixel_buffer = buffer + (low.y - rect.y) * buffer_w + (low.x - rect.x) + \
28                    frame * frame_stride; \
29     for (pixel.y = low.y; pixel.y < high.y; pixel.y++) { \
30       for (pixel.x = low.x; pixel.x < high.x; pixel.x++, pixel_buffer++) {
31 
32 #define END_FOR_PIXEL_WINDOW \
33   } \
34   pixel_buffer += buffer_w - (high.x - low.x); \
35   } \
36   }
37 
filter_get_features(int3 pixel,const ccl_global float * ccl_restrict buffer,float * features,bool use_time,const float * ccl_restrict mean,int pass_stride)38 ccl_device_inline void filter_get_features(int3 pixel,
39                                            const ccl_global float *ccl_restrict buffer,
40                                            float *features,
41                                            bool use_time,
42                                            const float *ccl_restrict mean,
43                                            int pass_stride)
44 {
45   features[0] = pixel.x;
46   features[1] = pixel.y;
47   features[2] = fabsf(ccl_get_feature(buffer, 0));
48   features[3] = ccl_get_feature(buffer, 1);
49   features[4] = ccl_get_feature(buffer, 2);
50   features[5] = ccl_get_feature(buffer, 3);
51   features[6] = ccl_get_feature(buffer, 4);
52   features[7] = ccl_get_feature(buffer, 5);
53   features[8] = ccl_get_feature(buffer, 6);
54   features[9] = ccl_get_feature(buffer, 7);
55   if (use_time) {
56     features[10] = pixel.z;
57   }
58   if (mean) {
59     for (int i = 0; i < (use_time ? 11 : 10); i++) {
60       features[i] -= mean[i];
61     }
62   }
63 }
64 
filter_get_feature_scales(int3 pixel,const ccl_global float * ccl_restrict buffer,float * scales,bool use_time,const float * ccl_restrict mean,int pass_stride)65 ccl_device_inline void filter_get_feature_scales(int3 pixel,
66                                                  const ccl_global float *ccl_restrict buffer,
67                                                  float *scales,
68                                                  bool use_time,
69                                                  const float *ccl_restrict mean,
70                                                  int pass_stride)
71 {
72   scales[0] = fabsf(pixel.x - mean[0]);
73   scales[1] = fabsf(pixel.y - mean[1]);
74   scales[2] = fabsf(fabsf(ccl_get_feature(buffer, 0)) - mean[2]);
75   scales[3] = len_squared(make_float3(ccl_get_feature(buffer, 1) - mean[3],
76                                       ccl_get_feature(buffer, 2) - mean[4],
77                                       ccl_get_feature(buffer, 3) - mean[5]));
78   scales[4] = fabsf(ccl_get_feature(buffer, 4) - mean[6]);
79   scales[5] = len_squared(make_float3(ccl_get_feature(buffer, 5) - mean[7],
80                                       ccl_get_feature(buffer, 6) - mean[8],
81                                       ccl_get_feature(buffer, 7) - mean[9]));
82   if (use_time) {
83     scales[6] = fabsf(pixel.z - mean[10]);
84   }
85 }
86 
filter_calculate_scale(float * scale,bool use_time)87 ccl_device_inline void filter_calculate_scale(float *scale, bool use_time)
88 {
89   scale[0] = 1.0f / max(scale[0], 0.01f);
90   scale[1] = 1.0f / max(scale[1], 0.01f);
91   scale[2] = 1.0f / max(scale[2], 0.01f);
92   if (use_time) {
93     scale[10] = 1.0f / max(scale[6], 0.01f);
94   }
95   scale[6] = 1.0f / max(scale[4], 0.01f);
96   scale[7] = scale[8] = scale[9] = 1.0f / max(sqrtf(scale[5]), 0.01f);
97   scale[3] = scale[4] = scale[5] = 1.0f / max(sqrtf(scale[3]), 0.01f);
98 }
99 
filter_get_color(const ccl_global float * ccl_restrict buffer,int pass_stride)100 ccl_device_inline float3 filter_get_color(const ccl_global float *ccl_restrict buffer,
101                                           int pass_stride)
102 {
103   return make_float3(
104       ccl_get_feature(buffer, 8), ccl_get_feature(buffer, 9), ccl_get_feature(buffer, 10));
105 }
106 
design_row_add(float * design_row,int rank,const ccl_global float * ccl_restrict transform,int stride,int row,float feature,int transform_row_stride)107 ccl_device_inline void design_row_add(float *design_row,
108                                       int rank,
109                                       const ccl_global float *ccl_restrict transform,
110                                       int stride,
111                                       int row,
112                                       float feature,
113                                       int transform_row_stride)
114 {
115   for (int i = 0; i < rank; i++) {
116     design_row[1 + i] += transform[(row * transform_row_stride + i) * stride] * feature;
117   }
118 }
119 
120 /* Fill the design row. */
filter_get_design_row_transform(int3 p_pixel,const ccl_global float * ccl_restrict p_buffer,int3 q_pixel,const ccl_global float * ccl_restrict q_buffer,int pass_stride,int rank,float * design_row,const ccl_global float * ccl_restrict transform,int stride,bool use_time)121 ccl_device_inline void filter_get_design_row_transform(
122     int3 p_pixel,
123     const ccl_global float *ccl_restrict p_buffer,
124     int3 q_pixel,
125     const ccl_global float *ccl_restrict q_buffer,
126     int pass_stride,
127     int rank,
128     float *design_row,
129     const ccl_global float *ccl_restrict transform,
130     int stride,
131     bool use_time)
132 {
133   int num_features = use_time ? 11 : 10;
134 
135   design_row[0] = 1.0f;
136   math_vector_zero(design_row + 1, rank);
137 
138 #define DESIGN_ROW_ADD(I, F) \
139   design_row_add(design_row, rank, transform, stride, I, F, num_features);
140   DESIGN_ROW_ADD(0, q_pixel.x - p_pixel.x);
141   DESIGN_ROW_ADD(1, q_pixel.y - p_pixel.y);
142   DESIGN_ROW_ADD(2, fabsf(ccl_get_feature(q_buffer, 0)) - fabsf(ccl_get_feature(p_buffer, 0)));
143   DESIGN_ROW_ADD(3, ccl_get_feature(q_buffer, 1) - ccl_get_feature(p_buffer, 1));
144   DESIGN_ROW_ADD(4, ccl_get_feature(q_buffer, 2) - ccl_get_feature(p_buffer, 2));
145   DESIGN_ROW_ADD(5, ccl_get_feature(q_buffer, 3) - ccl_get_feature(p_buffer, 3));
146   DESIGN_ROW_ADD(6, ccl_get_feature(q_buffer, 4) - ccl_get_feature(p_buffer, 4));
147   DESIGN_ROW_ADD(7, ccl_get_feature(q_buffer, 5) - ccl_get_feature(p_buffer, 5));
148   DESIGN_ROW_ADD(8, ccl_get_feature(q_buffer, 6) - ccl_get_feature(p_buffer, 6));
149   DESIGN_ROW_ADD(9, ccl_get_feature(q_buffer, 7) - ccl_get_feature(p_buffer, 7));
150   if (use_time) {
151     DESIGN_ROW_ADD(10, q_pixel.z - p_pixel.z)
152   }
153 #undef DESIGN_ROW_ADD
154 }
155 
156 CCL_NAMESPACE_END
157