1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
2 // REQUIRES: powerpc-registered-target
3 
4 // RUN: %clang -S -emit-llvm -target powerpc64-gnu-linux -mcpu=pwr8 -DNO_MM_MALLOC -ffreestanding -DNO_WARN_X86_INTRINSICS %s \
5 // RUN:   -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s
6 // RUN: %clang -S -emit-llvm -target powerpc64le-gnu-linux -mcpu=pwr8 -DNO_MM_MALLOC -ffreestanding -DNO_WARN_X86_INTRINSICS %s \
7 // RUN:   -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s
8 
9 #include <pmmintrin.h>
10 
11 __m128d resd, md1, md2;
12 __m128 res, m1, m2;
13 __m128i resi, mi;
14 double *d;
15 
16 void __attribute__((noinline))
test_pmmintrin()17 test_pmmintrin() {
18   resd = _mm_addsub_pd(md1, md2);
19   res = _mm_addsub_ps(m1, m2);
20   resd = _mm_hadd_pd(md1, md2);
21   res = _mm_hadd_ps(m1, m2);
22   resd = _mm_hsub_pd(md1, md2);
23   res = _mm_hsub_ps(m1, m2);
24   resi = _mm_lddqu_si128(&mi);
25   resd = _mm_loaddup_pd(d);
26   resd = _mm_movedup_pd(md1);
27   res = _mm_movehdup_ps(m1);
28   res = _mm_moveldup_ps(m1);
29 }
30 
31 // CHECK-LABEL: @test_pmmintrin
32 
33 // CHECK: define available_externally <2 x double> @_mm_addsub_pd(<2 x double> [[REG1:[0-9a-zA-Z_%.]+]], <2 x double> [[REG2:[0-9a-zA-Z_%.]+]])
34 // CHECK: store <2 x double> [[REG1]], <2 x double>* [[REG3:[0-9a-zA-Z_%.]+]], align 16
35 // CHECK-NEXT: store <2 x double> [[REG2]], <2 x double>* [[REG4:[0-9a-zA-Z_%.]+]], align 16
36 // CHECK-NEXT: store <2 x double> <double -0.000000e+00, double 0.000000e+00>, <2 x double>* [[REG5:[0-9a-zA-Z_%.]+]], align 16
37 // CHECK-NEXT: [[REG7:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG4]], align 16
38 // CHECK-NEXT: [[REG8:[0-9a-zA-Z_%.]+]] = call <2 x double> @vec_xor(double vector[2], double vector[2])(<2 x double> [[REG7]], <2 x double> <double -0.000000e+00, double 0.000000e+00>)
39 // CHECK-NEXT: store <2 x double> [[REG8]], <2 x double>* [[REG6:[0-9a-zA-Z_%.]+]], align 16
40 // CHECK-NEXT: [[REG9:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG3]], align 16
41 // CHECK-NEXT: [[REG10:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG6]], align 16
42 // CHECK-NEXT: [[REG11:[0-9a-zA-Z_%.]+]] = call <2 x double> @vec_add(double vector[2], double vector[2])(<2 x double> [[REG9]], <2 x double> [[REG10]])
43 // CHECK-NEXT: ret <2 x double> [[REG11]]
44 
45 // CHECK: define available_externally <4 x float> @_mm_addsub_ps(<4 x float> [[REG12:[0-9a-zA-Z_%.]+]], <4 x float> [[REG13:[0-9a-zA-Z_%.]+]])
46 // CHECK: store <4 x float> [[REG12]], <4 x float>* [[REG14:[0-9a-zA-Z_%.]+]], align 16
47 // CHECK-NEXT: store <4 x float> [[REG13]], <4 x float>* [[REG15:[0-9a-zA-Z_%.]+]], align 16
48 // CHECK-NEXT: store <4 x float> <float -0.000000e+00, float 0.000000e+00, float -0.000000e+00, float 0.000000e+00>, <4 x float>* [[REG16:[0-9a-zA-Z_%.]+]], align 16
49 // CHECK-NEXT: [[REG18:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG15]], align 16
50 // CHECK-NEXT: [[REG19:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_xor(float vector[4], float vector[4])(<4 x float> [[REG18]], <4 x float> <float -0.000000e+00, float 0.000000e+00, float -0.000000e+00, float 0.000000e+00>)
51 // CHECK-NEXT: store <4 x float> [[REG19]], <4 x float>* [[REG17:[0-9a-zA-Z_%.]+]], align 16
52 // CHECK-NEXT: [[REG20:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG14]], align 16
53 // CHECK-NEXT: [[REG21:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG17]], align 16
54 // CHECK-NEXT: [[REG22:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_add(float vector[4], float vector[4])(<4 x float> [[REG20]], <4 x float> [[REG21]])
55 // CHECK-NEXT: ret <4 x float> [[REG22]]
56 
57 // CHECK: define available_externally <2 x double> @_mm_hadd_pd(<2 x double> [[REG23:[0-9a-zA-Z_%.]+]], <2 x double> [[REG24:[0-9a-zA-Z_%.]+]])
58 // CHECK: store <2 x double> [[REG23]], <2 x double>* [[REG25:[0-9a-zA-Z_%.]+]], align 16
59 // CHECK-NEXT: store <2 x double> [[REG24]], <2 x double>* [[REG26:[0-9a-zA-Z_%.]+]], align 16
60 // CHECK-NEXT: [[REG27:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG25]], align 16
61 // CHECK-NEXT: [[REG28:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG26]], align 16
62 // CHECK-NEXT: [[REG29:[0-9a-zA-Z_%.]+]] = call <2 x double> @vec_mergeh(double vector[2], double vector[2])(<2 x double> [[REG27]], <2 x double> [[REG28]])
63 // CHECK-NEXT: [[REG30:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG25]], align 16
64 // CHECK-NEXT: [[REG31:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG26]], align 16
65 // CHECK-NEXT: [[REG32:[0-9a-zA-Z_%.]+]] = call <2 x double> @vec_mergel(double vector[2], double vector[2])(<2 x double> [[REG30]], <2 x double> [[REG31]])
66 // CHECK-NEXT: [[REG33:[0-9a-zA-Z_%.]+]] = call <2 x double> @vec_add(double vector[2], double vector[2])(<2 x double> [[REG29]], <2 x double> [[REG32]])
67 // CHECK-NEXT: ret <2 x double> [[REG33]]
68 
69 // CHECK: define available_externally <4 x float> @_mm_hadd_ps(<4 x float> [[REG34:[0-9a-zA-Z_%.]+]], <4 x float> [[REG35:[0-9a-zA-Z_%.]+]])
70 // CHECK: store <4 x float> [[REG34]], <4 x float>* [[REG36:[0-9a-zA-Z_%.]+]], align 16
71 // CHECK-NEXT: store <4 x float> [[REG35]], <4 x float>* [[REG37:[0-9a-zA-Z_%.]+]], align 16
72 // CHECK-NEXT: store <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 8, i8 9, i8 10, i8 11, i8 16, i8 17, i8 18, i8 19, i8 24, i8 25, i8 26, i8 27>, <16 x i8>* [[REG38:[0-9a-zA-Z_%.]+]], align 16
73 // CHECK-NEXT: store <16 x i8> <i8 4, i8 5, i8 6, i8 7, i8 12, i8 13, i8 14, i8 15, i8 20, i8 21, i8 22, i8 23, i8 28, i8 29, i8 30, i8 31>, <16 x i8>* [[REG39:[0-9a-zA-Z_%.]+]], align 16
74 // CHECK-NEXT: [[REG40:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG36]], align 16
75 // CHECK-NEXT: [[REG41:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG37]], align 16
76 // CHECK-NEXT: [[REG42:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG38:[0-9a-zA-Z_%.]+]], align 16
77 // CHECK-NEXT: [[REG43:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_perm(float vector[4], float vector[4], unsigned char vector[16])(<4 x float> [[REG40]], <4 x float> [[REG41]], <16 x i8> [[REG42]])
78 // CHECK-NEXT: [[REG44:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG36]], align 16
79 // CHECK-NEXT: [[REG45:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG37]], align 16
80 // CHECK-NEXT: [[REG46:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG39:[0-9a-zA-Z_%.]+]], align 16
81 // CHECK-NEXT: [[REG47:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_perm(float vector[4], float vector[4], unsigned char vector[16])(<4 x float> [[REG44]], <4 x float> [[REG45]], <16 x i8> [[REG46]])
82 // CHECK-NEXT: [[REG48:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_add(float vector[4], float vector[4])(<4 x float> [[REG43]], <4 x float> [[REG47]])
83 // CHECK-NEXT: ret <4 x float> [[REG48]]
84 
85 // CHECK: define available_externally <2 x double> @_mm_hsub_pd(<2 x double> [[REG49:[0-9a-zA-Z_%.]+]], <2 x double> [[REG50:[0-9a-zA-Z_%.]+]])
86 // CHECK: store <2 x double> [[REG49]], <2 x double>* [[REG51:[0-9a-zA-Z_%.]+]], align 16
87 // CHECK-NEXT: store <2 x double> [[REG50]], <2 x double>* [[REG52:[0-9a-zA-Z_%.]+]], align 16
88 // CHECK-NEXT: [[REG53:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG51]], align 16
89 // CHECK-NEXT: [[REG54:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG52]], align 16
90 // CHECK-NEXT: [[REG55:[0-9a-zA-Z_%.]+]] = call <2 x double> @vec_mergeh(double vector[2], double vector[2])(<2 x double> [[REG53]], <2 x double> [[REG54]])
91 // CHECK-NEXT: [[REG56:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG51]], align 16
92 // CHECK-NEXT: [[REG57:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG52]], align 16
93 // CHECK-NEXT: [[REG58:[0-9a-zA-Z_%.]+]] = call <2 x double> @vec_mergel(double vector[2], double vector[2])(<2 x double> [[REG56]], <2 x double> [[REG57]])
94 // CHECK-NEXT: [[REG59:[0-9a-zA-Z_%.]+]] = call <2 x double> @vec_sub(double vector[2], double vector[2])(<2 x double> [[REG55]], <2 x double> [[REG58]])
95 // CHECK-NEXT: ret <2 x double> [[REG59]]
96 
97 // CHECK: define available_externally <4 x float> @_mm_hsub_ps(<4 x float> [[REG60:[0-9a-zA-Z_%.]+]], <4 x float> [[REG61:[0-9a-zA-Z_%.]+]])
98 // CHECK: store <4 x float> [[REG60]], <4 x float>* [[REG62:[0-9a-zA-Z_%.]+]], align 16
99 // CHECK-NEXT: store <4 x float> [[REG61]], <4 x float>* [[REG63:[0-9a-zA-Z_%.]+]], align 16
100 // CHECK-NEXT: store <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 8, i8 9, i8 10, i8 11, i8 16, i8 17, i8 18, i8 19, i8 24, i8 25, i8 26, i8 27>, <16 x i8>* [[REG64:[0-9a-zA-Z_%.]+]], align 16
101 // CHECK-NEXT: store <16 x i8> <i8 4, i8 5, i8 6, i8 7, i8 12, i8 13, i8 14, i8 15, i8 20, i8 21, i8 22, i8 23, i8 28, i8 29, i8 30, i8 31>, <16 x i8>* [[REG65:[0-9a-zA-Z_%.]+]], align 16
102 // CHECK-NEXT: [[REG66:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG62]], align 16
103 // CHECK-NEXT: [[REG67:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG63]], align 16
104 // CHECK-NEXT: [[REG68:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG64]], align 16
105 // CHECK-NEXT: [[REG69:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_perm(float vector[4], float vector[4], unsigned char vector[16])(<4 x float> [[REG66]], <4 x float> [[REG67]], <16 x i8> [[REG68]])
106 // CHECK-NEXT: [[REG70:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG62]], align 16
107 // CHECK-NEXT: [[REG71:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG63]], align 16
108 // CHECK-NEXT: [[REG72:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG65]], align 16
109 // CHECK-NEXT: [[REG73:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_perm(float vector[4], float vector[4], unsigned char vector[16])(<4 x float> [[REG70]], <4 x float> [[REG71]], <16 x i8> [[REG72]])
110 // CHECK-NEXT: [[REG74:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sub(float vector[4], float vector[4])(<4 x float> [[REG69]], <4 x float> [[REG73]])
111 // CHECK-NEXT: ret <4 x float> [[REG74]]
112 
113 // CHECK: define available_externally <2 x i64> @_mm_lddqu_si128(<2 x i64>* [[REG75:[0-9a-zA-Z_%.]+]])
114 // CHECK: store <2 x i64>* [[REG75]], <2 x i64>** [[REG76:[0-9a-zA-Z_%.]+]], align 8
115 // CHECK-NEXT: [[REG77:[0-9a-zA-Z_%.]+]] = load <2 x i64>*, <2 x i64>** [[REG76]], align 8
116 // CHECK-NEXT: [[REG78:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64>* [[REG77]] to i32*
117 // CHECK-NEXT: [[REG79:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_vsx_ld(int, int const*)(i32 signext 0, i32* [[REG78]])
118 // CHECK-NEXT: [[REG80:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG79]] to <2 x i64>
119 // CHECK-NEXT: ret <2 x i64> [[REG80]]
120 
121 // CHECK: define available_externally <2 x double> @_mm_loaddup_pd(double* [[REG81:[0-9a-zA-Z_%.]+]])
122 // CHECK: store double* [[REG81]], double** [[REG82:[0-9a-zA-Z_%.]+]], align 8
123 // CHECK-NEXT: [[REG83:[0-9a-zA-Z_%.]+]] = load double*, double** [[REG82]], align 8
124 // CHECK-NEXT: [[REG84:[0-9a-zA-Z_%.]+]] = load double, double* [[REG83]], align 8
125 // CHECK-NEXT: [[REG85:[0-9a-zA-Z_%.]+]] = call <2 x double> @vec_splats(double)(double [[REG84]])
126 // CHECK-NEXT: ret <2 x double> [[REG85]]
127 
128 // CHECK: define available_externally <2 x double> @_mm_movedup_pd(<2 x double> [[REG86:[0-9a-zA-Z_%.]+]])
129 // CHECK: store <2 x double> [[REG86]], <2 x double>* [[REG87:[0-9a-zA-Z_%.]+]], align 16
130 // CHECK-NEXT: [[REG88:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG87]], align 16
131 // CHECK-NEXT: [[REG89:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG87]], align 16
132 // CHECK-NEXT: [[REG90:[0-9a-zA-Z_%.]+]] = call <2 x double> @_mm_shuffle_pd(<2 x double> [[REG88]], <2 x double> [[REG89]], i32 signext 0)
133 // CHECK-NEXT: ret <2 x double> [[REG90]]
134 
135 // CHECK: define available_externally <4 x float> @_mm_movehdup_ps(<4 x float> [[REG91:[0-9a-zA-Z_%.]+]])
136 // CHECK: store <4 x float> [[REG91]], <4 x float>* [[REG92:[0-9a-zA-Z_%.]+]], align 16
137 // CHECK-NEXT: [[REG93:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG92]], align 16
138 // CHECK-NEXT: [[REG94:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG93]] to <4 x i32>
139 // CHECK-NEXT: [[REG95:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG92]], align 16
140 // CHECK-NEXT: [[REG96:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG95]] to <4 x i32>
141 // CHECK-NEXT: [[REG97:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_mergeo(unsigned int vector[4], unsigned int vector[4])(<4 x i32> [[REG94]], <4 x i32> [[REG96]])
142 // CHECK-NEXT: [[REG98:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG97]] to <4 x float>
143 // CHECK-NEXT: ret <4 x float> [[REG98]]
144 
145 // CHECK: define available_externally <4 x float> @_mm_moveldup_ps(<4 x float> [[REG99:[0-9a-zA-Z_%.]+]])
146 // CHECK: store <4 x float> [[REG99]], <4 x float>* [[REG100:[0-9a-zA-Z_%.]+]], align 16
147 // CHECK-NEXT: [[REG101:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG100]], align 16
148 // CHECK-NEXT: [[REG102:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG101]] to <4 x i32>
149 // CHECK-NEXT: [[REG103:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG100]], align 16
150 // CHECK-NEXT: [[REG104:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG103]] to <4 x i32>
151 // CHECK-NEXT: [[REG105:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_mergee(unsigned int vector[4], unsigned int vector[4])(<4 x i32> [[REG102]], <4 x i32> [[REG104]])
152 // CHECK-NEXT: [[REG106:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG105]] to <4 x float>
153 // CHECK-NEXT: ret <4 x float> [[REG106]]
154