1;; This code used to be expanded through interesting expansions in 2;; the machine description, compiled from this code: 3;; 4;; #ifdef L_mulsi3 5;; long __Mul (unsigned long a, unsigned long b) __attribute__ ((__const__)); 6;; 7;; /* This must be compiled with the -mexpand-mul flag, to synthesize the 8;; multiplication from the mstep instructions. The check for 9;; smaller-size multiplication pays off in the order of .5-10%; 10;; estimated median 1%, depending on application. 11;; FIXME: It can be further optimized if we go to assembler code, as 12;; gcc 2.7.2 adds a few unnecessary instructions and does not put the 13;; basic blocks in optimal order. */ 14;; long 15;; __Mul (unsigned long a, unsigned long b) 16;; { 17;; #if defined (__CRIS_arch_version) && __CRIS_arch_version >= 10 18;; /* In case other code is compiled without -march=v10, they will 19;; contain calls to __Mul, regardless of flags at link-time. The 20;; "else"-code below will work, but is unnecessarily slow. This 21;; sometimes cuts a few minutes off from simulation time by just 22;; returning a "mulu.d". */ 23;; return a * b; 24;; #else 25;; unsigned long min; 26;; 27;; /* Get minimum via the bound insn. */ 28;; min = a < b ? a : b; 29;; 30;; /* Can we omit computation of the high part? */ 31;; if (min > 65535) 32;; /* No. Perform full multiplication. */ 33;; return a * b; 34;; else 35;; { 36;; /* Check if both operands are within 16 bits. */ 37;; unsigned long max; 38;; 39;; /* Get maximum, by knowing the minimum. 40;; This will partition a and b into max and min. 41;; This is not currently something GCC understands, 42;; so do this trick by asm. */ 43;; __asm__ ("xor %1,%0\n\txor %2,%0" 44;; : "=r" (max) 45;; : "r" (b), "r" (a), "0" (min)); 46;; 47;; if (max > 65535) 48;; /* Make GCC understand that only the low part of "min" will be 49;; used. */ 50;; return max * (unsigned short) min; 51;; else 52;; /* Only the low parts of both operands are necessary. */ 53;; return ((unsigned short) max) * (unsigned short) min; 54;; } 55;; #endif /* not __CRIS_arch_version >= 10 */ 56;; } 57;; #endif /* L_mulsi3 */ 58;; 59;; That approach was abandoned since the caveats outweighted the 60;; benefits. The expand-multiplication machinery is also removed, so you 61;; can't do this anymore. 62;; 63;; For doubters of there being any benefits, some where: insensitivity to: 64;; - ABI changes (mostly for experimentation) 65;; - assembler syntax differences (mostly debug format). 66;; - insn scheduling issues. 67;; Most ABI experiments will presumably happen with arches with mul insns, 68;; so that argument doesn't really hold anymore, and it's unlikely there 69;; being new arch variants needing insn scheduling and not having mul 70;; insns. 71 72;; ELF and a.out have different syntax for local labels: the "wrong" 73;; one may not be omitted from the object. 74#undef L 75#ifdef __AOUT__ 76# define L(x) x 77#else 78# define L(x) .x 79#endif 80 81 .global ___Mul 82 .type ___Mul,@function 83___Mul: 84#if defined (__CRIS_arch_version) && __CRIS_arch_version >= 10 85;; Can't have the mulu.d last on a cache-line (in the delay-slot of the 86;; "ret"), due to hardware bug. See documentation for -mmul-bug-workaround. 87;; Not worthwhile to conditionalize here. 88 .p2alignw 2,0x050f 89 mulu.d $r11,$r10 90 ret 91 nop 92#else 93 move.d $r10,$r12 94 move.d $r11,$r9 95 bound.d $r12,$r9 96 cmpu.w 65535,$r9 97 bls L(L3) 98 move.d $r12,$r13 99 100 movu.w $r11,$r9 101 lslq 16,$r13 102 mstep $r9,$r13 103 mstep $r9,$r13 104 mstep $r9,$r13 105 mstep $r9,$r13 106 mstep $r9,$r13 107 mstep $r9,$r13 108 mstep $r9,$r13 109 mstep $r9,$r13 110 mstep $r9,$r13 111 mstep $r9,$r13 112 mstep $r9,$r13 113 mstep $r9,$r13 114 mstep $r9,$r13 115 mstep $r9,$r13 116 mstep $r9,$r13 117 mstep $r9,$r13 118 clear.w $r10 119 test.d $r10 120 mstep $r9,$r10 121 mstep $r9,$r10 122 mstep $r9,$r10 123 mstep $r9,$r10 124 mstep $r9,$r10 125 mstep $r9,$r10 126 mstep $r9,$r10 127 mstep $r9,$r10 128 mstep $r9,$r10 129 mstep $r9,$r10 130 mstep $r9,$r10 131 mstep $r9,$r10 132 mstep $r9,$r10 133 mstep $r9,$r10 134 mstep $r9,$r10 135 mstep $r9,$r10 136 movu.w $r12,$r12 137 move.d $r11,$r9 138 clear.w $r9 139 test.d $r9 140 mstep $r12,$r9 141 mstep $r12,$r9 142 mstep $r12,$r9 143 mstep $r12,$r9 144 mstep $r12,$r9 145 mstep $r12,$r9 146 mstep $r12,$r9 147 mstep $r12,$r9 148 mstep $r12,$r9 149 mstep $r12,$r9 150 mstep $r12,$r9 151 mstep $r12,$r9 152 mstep $r12,$r9 153 mstep $r12,$r9 154 mstep $r12,$r9 155 mstep $r12,$r9 156 add.w $r9,$r10 157 lslq 16,$r10 158 ret 159 add.d $r13,$r10 160 161L(L3): 162 move.d $r9,$r10 163 xor $r11,$r10 164 xor $r12,$r10 165 cmpu.w 65535,$r10 166 bls L(L5) 167 movu.w $r9,$r13 168 169 movu.w $r13,$r13 170 move.d $r10,$r9 171 lslq 16,$r9 172 mstep $r13,$r9 173 mstep $r13,$r9 174 mstep $r13,$r9 175 mstep $r13,$r9 176 mstep $r13,$r9 177 mstep $r13,$r9 178 mstep $r13,$r9 179 mstep $r13,$r9 180 mstep $r13,$r9 181 mstep $r13,$r9 182 mstep $r13,$r9 183 mstep $r13,$r9 184 mstep $r13,$r9 185 mstep $r13,$r9 186 mstep $r13,$r9 187 mstep $r13,$r9 188 clear.w $r10 189 test.d $r10 190 mstep $r13,$r10 191 mstep $r13,$r10 192 mstep $r13,$r10 193 mstep $r13,$r10 194 mstep $r13,$r10 195 mstep $r13,$r10 196 mstep $r13,$r10 197 mstep $r13,$r10 198 mstep $r13,$r10 199 mstep $r13,$r10 200 mstep $r13,$r10 201 mstep $r13,$r10 202 mstep $r13,$r10 203 mstep $r13,$r10 204 mstep $r13,$r10 205 mstep $r13,$r10 206 lslq 16,$r10 207 ret 208 add.d $r9,$r10 209 210L(L5): 211 movu.w $r9,$r9 212 lslq 16,$r10 213 mstep $r9,$r10 214 mstep $r9,$r10 215 mstep $r9,$r10 216 mstep $r9,$r10 217 mstep $r9,$r10 218 mstep $r9,$r10 219 mstep $r9,$r10 220 mstep $r9,$r10 221 mstep $r9,$r10 222 mstep $r9,$r10 223 mstep $r9,$r10 224 mstep $r9,$r10 225 mstep $r9,$r10 226 mstep $r9,$r10 227 mstep $r9,$r10 228 ret 229 mstep $r9,$r10 230#endif 231L(Lfe1): 232 .size ___Mul,L(Lfe1)-___Mul 233