1 /**************************************************************************
2  *
3  * Copyright 2010 VMware, Inc.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sub license, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
18  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
19  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20  * USE OR OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * The above copyright notice and this permission notice (including the
23  * next paragraph) shall be included in all copies or substantial portions
24  * of the Software.
25  *
26  **************************************************************************/
27 
28 
29 #include "lp_bld_type.h"
30 #include "lp_bld_arit.h"
31 #include "lp_bld_const.h"
32 #include "lp_bld_swizzle.h"
33 #include "lp_bld_quad.h"
34 #include "lp_bld_pack.h"
35 
36 
37 static const unsigned char
38 swizzle_left[4] = {
39    LP_BLD_QUAD_TOP_LEFT,     LP_BLD_QUAD_TOP_LEFT,
40    LP_BLD_QUAD_BOTTOM_LEFT,  LP_BLD_QUAD_BOTTOM_LEFT
41 };
42 
43 static const unsigned char
44 swizzle_right[4] = {
45    LP_BLD_QUAD_TOP_RIGHT,    LP_BLD_QUAD_TOP_RIGHT,
46    LP_BLD_QUAD_BOTTOM_RIGHT, LP_BLD_QUAD_BOTTOM_RIGHT
47 };
48 
49 static const unsigned char
50 swizzle_top[4] = {
51    LP_BLD_QUAD_TOP_LEFT,     LP_BLD_QUAD_TOP_RIGHT,
52    LP_BLD_QUAD_TOP_LEFT,     LP_BLD_QUAD_TOP_RIGHT
53 };
54 
55 static const unsigned char
56 swizzle_bottom[4] = {
57    LP_BLD_QUAD_BOTTOM_LEFT,  LP_BLD_QUAD_BOTTOM_RIGHT,
58    LP_BLD_QUAD_BOTTOM_LEFT,  LP_BLD_QUAD_BOTTOM_RIGHT
59 };
60 
61 
62 LLVMValueRef
lp_build_ddx(struct lp_build_context * bld,LLVMValueRef a)63 lp_build_ddx(struct lp_build_context *bld,
64              LLVMValueRef a)
65 {
66    LLVMValueRef a_left  = lp_build_swizzle_aos(bld, a, swizzle_left);
67    LLVMValueRef a_right = lp_build_swizzle_aos(bld, a, swizzle_right);
68    return lp_build_sub(bld, a_right, a_left);
69 }
70 
71 
72 LLVMValueRef
lp_build_ddy(struct lp_build_context * bld,LLVMValueRef a)73 lp_build_ddy(struct lp_build_context *bld,
74              LLVMValueRef a)
75 {
76    LLVMValueRef a_top    = lp_build_swizzle_aos(bld, a, swizzle_top);
77    LLVMValueRef a_bottom = lp_build_swizzle_aos(bld, a, swizzle_bottom);
78    return lp_build_sub(bld, a_bottom, a_top);
79 }
80 
81 /*
82  * Helper for building packed ddx/ddy vector for one coord (scalar per quad
83  * values). The vector will look like this (8-wide):
84  * dr1dx _____ -dr1dy _____ dr2dx _____ -dr2dy _____
85  * This only requires one shuffle instead of two for more straightforward packing.
86  */
87 LLVMValueRef
lp_build_packed_ddx_ddy_onecoord(struct lp_build_context * bld,LLVMValueRef a)88 lp_build_packed_ddx_ddy_onecoord(struct lp_build_context *bld,
89                                  LLVMValueRef a)
90 {
91    struct gallivm_state *gallivm = bld->gallivm;
92    LLVMBuilderRef builder = gallivm->builder;
93    LLVMValueRef vec1, vec2;
94 
95    /* use aos swizzle helper */
96 
97    static const unsigned char swizzle1[] = { /* no-op swizzle */
98       LP_BLD_QUAD_TOP_LEFT, LP_BLD_SWIZZLE_DONTCARE,
99       LP_BLD_QUAD_BOTTOM_LEFT, LP_BLD_SWIZZLE_DONTCARE
100    };
101    static const unsigned char swizzle2[] = {
102       LP_BLD_QUAD_TOP_RIGHT, LP_BLD_SWIZZLE_DONTCARE,
103       LP_BLD_QUAD_TOP_LEFT, LP_BLD_SWIZZLE_DONTCARE
104    };
105 
106    vec1 = lp_build_swizzle_aos(bld, a, swizzle1);
107    vec2 = lp_build_swizzle_aos(bld, a, swizzle2);
108 
109    if (bld->type.floating)
110       return LLVMBuildFSub(builder, vec2, vec1, "ddxddy");
111    else
112       return LLVMBuildSub(builder, vec2, vec1, "ddxddy");
113 }
114 
115 
116 /*
117  * Helper for building packed ddx/ddy vector for one coord (scalar per quad
118  * values). The vector will look like this (8-wide):
119  * ds1dx ds1dy dt1dx dt1dy ds2dx ds2dy dt2dx dt2dy
120  * This only needs 2 (v)shufps.
121  */
122 LLVMValueRef
lp_build_packed_ddx_ddy_twocoord(struct lp_build_context * bld,LLVMValueRef a,LLVMValueRef b)123 lp_build_packed_ddx_ddy_twocoord(struct lp_build_context *bld,
124                                  LLVMValueRef a, LLVMValueRef b)
125 {
126    struct gallivm_state *gallivm = bld->gallivm;
127    LLVMBuilderRef builder = gallivm->builder;
128    LLVMValueRef shuffles1[LP_MAX_VECTOR_LENGTH/4];
129    LLVMValueRef shuffles2[LP_MAX_VECTOR_LENGTH/4];
130    LLVMValueRef vec1, vec2;
131    unsigned length, num_quads, i;
132 
133    /* XXX: do hsub version */
134    length = bld->type.length;
135    num_quads = length / 4;
136    for (i = 0; i < num_quads; i++) {
137       unsigned s1 = 4 * i;
138       unsigned s2 = 4 * i + length;
139       shuffles1[4*i + 0] = lp_build_const_int32(gallivm, LP_BLD_QUAD_TOP_LEFT + s1);
140       shuffles1[4*i + 1] = lp_build_const_int32(gallivm, LP_BLD_QUAD_TOP_LEFT + s1);
141       shuffles1[4*i + 2] = lp_build_const_int32(gallivm, LP_BLD_QUAD_TOP_LEFT + s2);
142       shuffles1[4*i + 3] = lp_build_const_int32(gallivm, LP_BLD_QUAD_TOP_LEFT + s2);
143       shuffles2[4*i + 0] = lp_build_const_int32(gallivm, LP_BLD_QUAD_TOP_RIGHT + s1);
144       shuffles2[4*i + 1] = lp_build_const_int32(gallivm, LP_BLD_QUAD_BOTTOM_LEFT + s1);
145       shuffles2[4*i + 2] = lp_build_const_int32(gallivm, LP_BLD_QUAD_TOP_RIGHT + s2);
146       shuffles2[4*i + 3] = lp_build_const_int32(gallivm, LP_BLD_QUAD_BOTTOM_LEFT + s2);
147    }
148    vec1 = LLVMBuildShuffleVector(builder, a, b,
149                                  LLVMConstVector(shuffles1, length), "");
150    vec2 = LLVMBuildShuffleVector(builder, a, b,
151                                  LLVMConstVector(shuffles2, length), "");
152    if (bld->type.floating)
153       return LLVMBuildFSub(builder, vec2, vec1, "ddxddyddxddy");
154    else
155       return LLVMBuildSub(builder, vec2, vec1, "ddxddyddxddy");
156 }
157 
158 
159 /**
160  * Twiddle from quad format to row format
161  *
162  *   src0      src1
163  * ######### #########      #################
164  * # 0 | 1 # # 4 | 5 #      # 0 | 1 | 4 | 5 # src0
165  * #---+---# #---+---#  ->  #################
166  * # 2 | 3 # # 6 | 7 #      # 2 | 3 | 6 | 7 # src1
167  * ######### #########      #################
168  *
169  */
170 void
lp_bld_quad_twiddle(struct gallivm_state * gallivm,struct lp_type lp_dst_type,const LLVMValueRef * src,unsigned src_count,LLVMValueRef * dst)171 lp_bld_quad_twiddle(struct gallivm_state *gallivm,
172                     struct lp_type lp_dst_type,
173                     const LLVMValueRef* src,
174                     unsigned src_count,
175                     LLVMValueRef* dst)
176 {
177    LLVMBuilderRef builder = gallivm->builder;
178    LLVMTypeRef dst_type_ref;
179    LLVMTypeRef type2_ref;
180    struct lp_type type2;
181    unsigned i;
182 
183    assert((src_count % 2) == 0);
184 
185    /* Create a type with only 2 elements */
186    type2 = lp_dst_type;
187    type2.width = (lp_dst_type.width * lp_dst_type.length) / 2;
188    type2.length = 2;
189    type2.floating = 0;
190 
191    type2_ref = lp_build_vec_type(gallivm, type2);
192    dst_type_ref = lp_build_vec_type(gallivm, lp_dst_type);
193 
194    for (i = 0; i < src_count; i += 2) {
195       LLVMValueRef src0, src1;
196 
197       src0 = LLVMBuildBitCast(builder, src[i + 0], type2_ref, "");
198       src1 = LLVMBuildBitCast(builder, src[i + 1], type2_ref, "");
199 
200       dst[i + 0] = lp_build_interleave2(gallivm, type2, src0, src1, 0);
201       dst[i + 1] = lp_build_interleave2(gallivm, type2, src0, src1, 1);
202 
203       dst[i + 0] = LLVMBuildBitCast(builder, dst[i + 0], dst_type_ref, "");
204       dst[i + 1] = LLVMBuildBitCast(builder, dst[i + 1], dst_type_ref, "");
205    }
206 }
207