1 /*
2  * unaligned.h - inline functions for unaligned memory accesses
3  */
4 
5 #ifndef LIB_UNALIGNED_H
6 #define LIB_UNALIGNED_H
7 
8 #include "lib_common.h"
9 
10 /***** Unaligned loads and stores without endianness conversion *****/
11 
12 /*
13  * memcpy() is portable, and it usually gets optimized appropriately by modern
14  * compilers.  I.e., each memcpy() of 1, 2, 4, or WORDBYTES bytes gets compiled
15  * to a load or store instruction, not to an actual function call.
16  *
17  * We no longer use the "packed struct" approach, as that is nonstandard, has
18  * unclear semantics, and doesn't receive enough testing
19  * (see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94994).
20  *
21  * arm32 with __ARM_FEATURE_UNALIGNED in gcc 5 and earlier is a known exception
22  * where memcpy() generates inefficient code
23  * (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67366).  However, we no longer
24  * consider that one case important enough to maintain different code for.
25  * If you run into it, please just use a newer version of gcc (or use clang).
26  */
27 
28 #define DEFINE_UNALIGNED_TYPE(type)				\
29 static forceinline type						\
30 load_##type##_unaligned(const void *p)				\
31 {								\
32 	type v;							\
33 	memcpy(&v, p, sizeof(v));				\
34 	return v;						\
35 }								\
36 								\
37 static forceinline void						\
38 store_##type##_unaligned(type v, void *p)			\
39 {								\
40 	memcpy(p, &v, sizeof(v));				\
41 }
42 
43 DEFINE_UNALIGNED_TYPE(u16)
DEFINE_UNALIGNED_TYPE(u32)44 DEFINE_UNALIGNED_TYPE(u32)
45 DEFINE_UNALIGNED_TYPE(u64)
46 DEFINE_UNALIGNED_TYPE(machine_word_t)
47 
48 #define load_word_unaligned	load_machine_word_t_unaligned
49 #define store_word_unaligned	store_machine_word_t_unaligned
50 
51 /***** Unaligned loads with endianness conversion *****/
52 
53 static forceinline u16
54 get_unaligned_le16(const u8 *p)
55 {
56 	if (UNALIGNED_ACCESS_IS_FAST)
57 		return le16_bswap(load_u16_unaligned(p));
58 	else
59 		return ((u16)p[1] << 8) | p[0];
60 }
61 
62 static forceinline u16
get_unaligned_be16(const u8 * p)63 get_unaligned_be16(const u8 *p)
64 {
65 	if (UNALIGNED_ACCESS_IS_FAST)
66 		return be16_bswap(load_u16_unaligned(p));
67 	else
68 		return ((u16)p[0] << 8) | p[1];
69 }
70 
71 static forceinline u32
get_unaligned_le32(const u8 * p)72 get_unaligned_le32(const u8 *p)
73 {
74 	if (UNALIGNED_ACCESS_IS_FAST)
75 		return le32_bswap(load_u32_unaligned(p));
76 	else
77 		return ((u32)p[3] << 24) | ((u32)p[2] << 16) |
78 			((u32)p[1] << 8) | p[0];
79 }
80 
81 static forceinline u32
get_unaligned_be32(const u8 * p)82 get_unaligned_be32(const u8 *p)
83 {
84 	if (UNALIGNED_ACCESS_IS_FAST)
85 		return be32_bswap(load_u32_unaligned(p));
86 	else
87 		return ((u32)p[0] << 24) | ((u32)p[1] << 16) |
88 			((u32)p[2] << 8) | p[3];
89 }
90 
91 static forceinline u64
get_unaligned_le64(const u8 * p)92 get_unaligned_le64(const u8 *p)
93 {
94 	if (UNALIGNED_ACCESS_IS_FAST)
95 		return le64_bswap(load_u64_unaligned(p));
96 	else
97 		return ((u64)p[7] << 56) | ((u64)p[6] << 48) |
98 			((u64)p[5] << 40) | ((u64)p[4] << 32) |
99 			((u64)p[3] << 24) | ((u64)p[2] << 16) |
100 			((u64)p[1] << 8) | p[0];
101 }
102 
103 static forceinline machine_word_t
get_unaligned_leword(const u8 * p)104 get_unaligned_leword(const u8 *p)
105 {
106 	STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64);
107 	if (WORDBITS == 32)
108 		return get_unaligned_le32(p);
109 	else
110 		return get_unaligned_le64(p);
111 }
112 
113 /***** Unaligned stores with endianness conversion *****/
114 
115 static forceinline void
put_unaligned_le16(u16 v,u8 * p)116 put_unaligned_le16(u16 v, u8 *p)
117 {
118 	if (UNALIGNED_ACCESS_IS_FAST) {
119 		store_u16_unaligned(le16_bswap(v), p);
120 	} else {
121 		p[0] = (u8)(v >> 0);
122 		p[1] = (u8)(v >> 8);
123 	}
124 }
125 
126 static forceinline void
put_unaligned_be16(u16 v,u8 * p)127 put_unaligned_be16(u16 v, u8 *p)
128 {
129 	if (UNALIGNED_ACCESS_IS_FAST) {
130 		store_u16_unaligned(be16_bswap(v), p);
131 	} else {
132 		p[0] = (u8)(v >> 8);
133 		p[1] = (u8)(v >> 0);
134 	}
135 }
136 
137 static forceinline void
put_unaligned_le32(u32 v,u8 * p)138 put_unaligned_le32(u32 v, u8 *p)
139 {
140 	if (UNALIGNED_ACCESS_IS_FAST) {
141 		store_u32_unaligned(le32_bswap(v), p);
142 	} else {
143 		p[0] = (u8)(v >> 0);
144 		p[1] = (u8)(v >> 8);
145 		p[2] = (u8)(v >> 16);
146 		p[3] = (u8)(v >> 24);
147 	}
148 }
149 
150 static forceinline void
put_unaligned_be32(u32 v,u8 * p)151 put_unaligned_be32(u32 v, u8 *p)
152 {
153 	if (UNALIGNED_ACCESS_IS_FAST) {
154 		store_u32_unaligned(be32_bswap(v), p);
155 	} else {
156 		p[0] = (u8)(v >> 24);
157 		p[1] = (u8)(v >> 16);
158 		p[2] = (u8)(v >> 8);
159 		p[3] = (u8)(v >> 0);
160 	}
161 }
162 
163 static forceinline void
put_unaligned_le64(u64 v,u8 * p)164 put_unaligned_le64(u64 v, u8 *p)
165 {
166 	if (UNALIGNED_ACCESS_IS_FAST) {
167 		store_u64_unaligned(le64_bswap(v), p);
168 	} else {
169 		p[0] = (u8)(v >> 0);
170 		p[1] = (u8)(v >> 8);
171 		p[2] = (u8)(v >> 16);
172 		p[3] = (u8)(v >> 24);
173 		p[4] = (u8)(v >> 32);
174 		p[5] = (u8)(v >> 40);
175 		p[6] = (u8)(v >> 48);
176 		p[7] = (u8)(v >> 56);
177 	}
178 }
179 
180 static forceinline void
put_unaligned_leword(machine_word_t v,u8 * p)181 put_unaligned_leword(machine_word_t v, u8 *p)
182 {
183 	STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64);
184 	if (WORDBITS == 32)
185 		put_unaligned_le32(v, p);
186 	else
187 		put_unaligned_le64(v, p);
188 }
189 
190 /***** 24-bit loads *****/
191 
192 /*
193  * Given a 32-bit value that was loaded with the platform's native endianness,
194  * return a 32-bit value whose high-order 8 bits are 0 and whose low-order 24
195  * bits contain the first 3 bytes, arranged in octets in a platform-dependent
196  * order, at the memory location from which the input 32-bit value was loaded.
197  */
198 static forceinline u32
loaded_u32_to_u24(u32 v)199 loaded_u32_to_u24(u32 v)
200 {
201 	if (CPU_IS_LITTLE_ENDIAN())
202 		return v & 0xFFFFFF;
203 	else
204 		return v >> 8;
205 }
206 
207 /*
208  * Load the next 3 bytes from the memory location @p into the 24 low-order bits
209  * of a 32-bit value.  The order in which the 3 bytes will be arranged as octets
210  * in the 24 bits is platform-dependent.  At least LOAD_U24_REQUIRED_NBYTES
211  * bytes must be available at @p; note that this may be more than 3.
212  */
213 static forceinline u32
load_u24_unaligned(const u8 * p)214 load_u24_unaligned(const u8 *p)
215 {
216 #if UNALIGNED_ACCESS_IS_FAST
217 #  define LOAD_U24_REQUIRED_NBYTES 4
218 	return loaded_u32_to_u24(load_u32_unaligned(p));
219 #else
220 #  define LOAD_U24_REQUIRED_NBYTES 3
221 	if (CPU_IS_LITTLE_ENDIAN())
222 		return ((u32)p[0] << 0) | ((u32)p[1] << 8) | ((u32)p[2] << 16);
223 	else
224 		return ((u32)p[2] << 0) | ((u32)p[1] << 8) | ((u32)p[0] << 16);
225 #endif
226 }
227 
228 #endif /* LIB_UNALIGNED_H */
229