1 2 subroutine tce_sort_4(unsorted,sorted,a,b,c,d, 3 1 i,j,k,l,factor) 4 implicit none 5#include "util.fh" 6#include "tce_main.fh" 7 integer version 8 integer a,b,c,d 9 integer i,j,k,l 10 integer id(4),jd(4),ia,ib,j1,j2,j3,j4 11 double precision sorted(a*b*c*d) 12 double precision unsorted(a*b*c*d) 13 double precision factor 14c 15 version = 1000*i+100*j+10*k+l 16c 17c transpose_1234.log: The best loop order is: 2134 18c transpose_1243.log: The best loop order is: 1243 19c transpose_1324.log: The best loop order is: 1234 20c transpose_1342.log: The best loop order is: 1342 21c transpose_1423.log: The best loop order is: 1423 22c transpose_1432.log: The best loop order is: 1342 23c 24 if (version.eq.1234) then 25 call transpose_1234_loop_2134(unsorted,sorted,a,b,c,d,factor) 26 elseif (version.eq.1243) then 27 call transpose_1243_loop_1243(unsorted,sorted,a,b,c,d,factor) 28 elseif (version.eq.1324) then 29 call transpose_1324_loop_1234(unsorted,sorted,a,b,c,d,factor) 30 elseif (version.eq.1342) then 31 call transpose_1342_loop_1342(unsorted,sorted,a,b,c,d,factor) 32 elseif (version.eq.1423) then 33 call transpose_1423_loop_1423(unsorted,sorted,a,b,c,d,factor) 34 elseif (version.eq.1432) then 35 call transpose_1432_loop_1342(unsorted,sorted,a,b,c,d,factor) 36c 37c transpose_2134.log: The best loop order is: 2134 38c transpose_2143.log: The best loop order is: 2143 39c transpose_2314.log: The best loop order is: 2134 40c transpose_2341.log: The best loop order is: 2341 41c transpose_2413.log: The best loop order is: 2413 42c transpose_2431.log: The best loop order is: 2341 43c 44 elseif (version.eq.2134) then 45 call transpose_2134_loop_2134(unsorted,sorted,a,b,c,d,factor) 46 elseif (version.eq.2143) then 47 call transpose_2143_loop_2143(unsorted,sorted,a,b,c,d,factor) 48 elseif (version.eq.2314) then 49 call transpose_2314_loop_2134(unsorted,sorted,a,b,c,d,factor) 50 elseif (version.eq.2341) then 51 call transpose_2341_loop_2341(unsorted,sorted,a,b,c,d,factor) 52 elseif (version.eq.2413) then 53 call transpose_2413_loop_2413(unsorted,sorted,a,b,c,d,factor) 54 elseif (version.eq.2431) then 55 call transpose_2431_loop_2341(unsorted,sorted,a,b,c,d,factor) 56c 57c transpose_3124.log: The best loop order is: 1234 58c transpose_3142.log: The best loop order is: 1342 59c transpose_3214.log: The best loop order is: 2134 60c transpose_3241.log: The best loop order is: 2341 61c transpose_3412.log: The best loop order is: 1342 62c transpose_3421.log: The best loop order is: 2341 63c 64 elseif (version.eq.3124) then 65 call transpose_3124_loop_1234(unsorted,sorted,a,b,c,d,factor) 66 elseif (version.eq.3142) then 67 call transpose_3142_loop_1342(unsorted,sorted,a,b,c,d,factor) 68 elseif (version.eq.3214) then 69 call transpose_3214_loop_2134(unsorted,sorted,a,b,c,d,factor) 70 elseif (version.eq.3241) then 71 call transpose_3241_loop_2341(unsorted,sorted,a,b,c,d,factor) 72 elseif (version.eq.3412) then 73 call transpose_3412_loop_1342(unsorted,sorted,a,b,c,d,factor) 74 elseif (version.eq.3421) then 75 call transpose_3421_loop_2341(unsorted,sorted,a,b,c,d,factor) 76c 77c transpose_4123.log: The best loop order is: 1423 78c transpose_4132.log: The best loop order is: 1342 79c transpose_4213.log: The best loop order is: 2413 80c transpose_4231.log: The best loop order is: 2341 81c transpose_4312.log: The best loop order is: 1342 82c transpose_4321.log: The best loop order is: 2341 83c 84 elseif (version.eq.4123) then 85 call transpose_4123_loop_1423(unsorted,sorted,a,b,c,d,factor) 86 elseif (version.eq.4132) then 87 call transpose_4132_loop_1342(unsorted,sorted,a,b,c,d,factor) 88 elseif (version.eq.4213) then 89 call transpose_4213_loop_2413(unsorted,sorted,a,b,c,d,factor) 90 elseif (version.eq.4231) then 91 call transpose_4231_loop_2341(unsorted,sorted,a,b,c,d,factor) 92 elseif (version.eq.4312) then 93 call transpose_4312_loop_1342(unsorted,sorted,a,b,c,d,factor) 94 elseif (version.eq.4321) then 95 call transpose_4321_loop_2341(unsorted,sorted,a,b,c,d,factor) 96c 97 else 98 print*,'something is wrong...' 99 endif 100 101 return 102 end 103 subroutine transpose_4321_loop_3241(unsorted,sorted, 104 & dim1,dim2,dim3,dim4,factor) 105 implicit none 106 integer dim1,dim2,dim3,dim4 107 integer xdim1,xdim4,rdim1,rdim4 108 integer old offset,new_offset 109 integer j1,j2,j3,j4 110 double precision sorted(dim1*dim2*dim3*dim4) 111 double precision unsorted(dim1*dim2*dim3*dim4) 112 double precision factor 113 rdim1=mod(dim1,4) 114 rdim4=mod(dim4,4) 115 xdim1=dim1-rdim1 116 xdim4=dim4-rdim4 117!DEC$ ivdep 118!DEC$ prefetch sorted 119!DEC$ prefetch unsorted 120!DEC$ vector always 121!DEC$ loop count min(24), max(40), avg(32) 122 do j3 = 1,dim3 123!DEC$ loop count min(24), max(40), avg(32) 124 do j2 = 1,dim2 125!DEC$ loop count min(24), max(40), avg(32) 126!DEC$ vector always 127 do j4 = 1,xdim4,4 128!DEC$ loop count min(24), max(40), avg(32) 129!DEC$ vector always 130 do j1 = 1,xdim1,4 131 sorted(0+j1+dim1*(j2-1+dim2*(j3-1+dim3*(0+j4-1)))) = factor* 132 & unsorted(0+j4+dim4*(j3-1+dim3*(j2-1+dim2*(0+j1-1)))) 133 sorted(1+j1+dim1*(j2-1+dim2*(j3-1+dim3*(0+j4-1)))) = factor* 134 & unsorted(0+j4+dim4*(j3-1+dim3*(j2-1+dim2*(1+j1-1)))) 135 sorted(2+j1+dim1*(j2-1+dim2*(j3-1+dim3*(0+j4-1)))) = factor* 136 & unsorted(0+j4+dim4*(j3-1+dim3*(j2-1+dim2*(2+j1-1)))) 137 sorted(3+j1+dim1*(j2-1+dim2*(j3-1+dim3*(0+j4-1)))) = factor* 138 & unsorted(0+j4+dim4*(j3-1+dim3*(j2-1+dim2*(3+j1-1)))) 139 140 sorted(0+j1+dim1*(j2-1+dim2*(j3-1+dim3*(1+j4-1)))) = factor* 141 & unsorted(1+j4+dim4*(j3-1+dim3*(j2-1+dim2*(0+j1-1)))) 142 sorted(1+j1+dim1*(j2-1+dim2*(j3-1+dim3*(1+j4-1)))) = factor* 143 & unsorted(1+j4+dim4*(j3-1+dim3*(j2-1+dim2*(1+j1-1)))) 144 sorted(2+j1+dim1*(j2-1+dim2*(j3-1+dim3*(1+j4-1)))) = factor* 145 & unsorted(1+j4+dim4*(j3-1+dim3*(j2-1+dim2*(2+j1-1)))) 146 sorted(3+j1+dim1*(j2-1+dim2*(j3-1+dim3*(1+j4-1)))) = factor* 147 & unsorted(1+j4+dim4*(j3-1+dim3*(j2-1+dim2*(3+j1-1)))) 148 149 sorted(0+j1+dim1*(j2-1+dim2*(j3-1+dim3*(2+j4-1)))) = factor* 150 & unsorted(2+j4+dim4*(j3-1+dim3*(j2-1+dim2*(0+j1-1)))) 151 sorted(1+j1+dim1*(j2-1+dim2*(j3-1+dim3*(2+j4-1)))) = factor* 152 & unsorted(2+j4+dim4*(j3-1+dim3*(j2-1+dim2*(1+j1-1)))) 153 sorted(2+j1+dim1*(j2-1+dim2*(j3-1+dim3*(2+j4-1)))) = factor* 154 & unsorted(2+j4+dim4*(j3-1+dim3*(j2-1+dim2*(2+j1-1)))) 155 sorted(3+j1+dim1*(j2-1+dim2*(j3-1+dim3*(2+j4-1)))) = factor* 156 & unsorted(2+j4+dim4*(j3-1+dim3*(j2-1+dim2*(3+j1-1)))) 157 158 sorted(0+j1+dim1*(j2-1+dim2*(j3-1+dim3*(3+j4-1)))) = factor* 159 & unsorted(3+j4+dim4*(j3-1+dim3*(j2-1+dim2*(0+j1-1)))) 160 sorted(1+j1+dim1*(j2-1+dim2*(j3-1+dim3*(3+j4-1)))) = factor* 161 & unsorted(3+j4+dim4*(j3-1+dim3*(j2-1+dim2*(1+j1-1)))) 162 sorted(2+j1+dim1*(j2-1+dim2*(j3-1+dim3*(3+j4-1)))) = factor* 163 & unsorted(3+j4+dim4*(j3-1+dim3*(j2-1+dim2*(2+j1-1)))) 164 sorted(3+j1+dim1*(j2-1+dim2*(j3-1+dim3*(3+j4-1)))) = factor* 165 & unsorted(3+j4+dim4*(j3-1+dim3*(j2-1+dim2*(3+j1-1)))) 166 enddo 167!DEC$ loop count min(0), max(4), avg(2) 168!DEC$ vector always 169 do j1 = xdim1+1,dim1,1 170 sorted(0+j1+dim1*(j2-1+dim2*(j3-1+dim3*(0+j4-1)))) = factor* 171 & unsorted(0+j4+dim4*(j3-1+dim3*(j2-1+dim2*(0+j1-1)))) 172 sorted(0+j1+dim1*(j2-1+dim2*(j3-1+dim3*(1+j4-1)))) = factor* 173 & unsorted(1+j4+dim4*(j3-1+dim3*(j2-1+dim2*(0+j1-1)))) 174 sorted(0+j1+dim1*(j2-1+dim2*(j3-1+dim3*(2+j4-1)))) = factor* 175 & unsorted(2+j4+dim4*(j3-1+dim3*(j2-1+dim2*(0+j1-1)))) 176 sorted(0+j1+dim1*(j2-1+dim2*(j3-1+dim3*(3+j4-1)))) = factor* 177 & unsorted(3+j4+dim4*(j3-1+dim3*(j2-1+dim2*(0+j1-1)))) 178 enddo 179 enddo 180!DEC$ loop count min(0), max(4), avg(2) 181!DEC$ vector always 182 do j4 = xdim4+1,dim4,1 183!DEC$ loop count min(24), max(40), avg(32) 184!DEC$ vector always 185 do j1 = 1,xdim1,4 186 sorted(0+j1+dim1*(j2-1+dim2*(j3-1+dim3*(0+j4-1)))) = factor* 187 & unsorted(0+j4+dim4*(j3-1+dim3*(j2-1+dim2*(0+j1-1)))) 188 sorted(1+j1+dim1*(j2-1+dim2*(j3-1+dim3*(0+j4-1)))) = factor* 189 & unsorted(0+j4+dim4*(j3-1+dim3*(j2-1+dim2*(1+j1-1)))) 190 sorted(2+j1+dim1*(j2-1+dim2*(j3-1+dim3*(0+j4-1)))) = factor* 191 & unsorted(0+j4+dim4*(j3-1+dim3*(j2-1+dim2*(2+j1-1)))) 192 sorted(3+j1+dim1*(j2-1+dim2*(j3-1+dim3*(0+j4-1)))) = factor* 193 & unsorted(0+j4+dim4*(j3-1+dim3*(j2-1+dim2*(3+j1-1)))) 194 enddo 195 enddo 196!DEC$ loop count min(0), max(4), avg(2) 197!DEC$ vector always 198 do j4 = xdim4+1,dim4,1 199!DEC$ loop count min(0), max(4), avg(2) 200!DEC$ vector always 201 do j1 = xdim1+1,dim1,1 202 sorted(0+j1+dim1*(j2-1+dim2*(j3-1+dim3*(0+j4-1)))) = factor* 203 & unsorted(0+j4+dim4*(j3-1+dim3*(j2-1+dim2*(0+j1-1)))) 204 enddo 205 enddo 206 enddo 207 enddo 208 return 209 end 210c 211c 212c ADD AUTO-GENERATED CODE HERE 213c 214c 215 subroutine transpose_1234_loop_2134(unsorted,sorted, 216 & dim1,dim2,dim3,dim4,factor) 217 implicit none 218 integer dim1,dim2,dim3,dim4 219 integer old_offset,new_offset 220 integer j1,j2,j3,j4 221 double precision sorted(dim1*dim2*dim3*dim4) 222 double precision unsorted(dim1*dim2*dim3*dim4) 223 double precision factor 224!DEC$ prefetch sorted 225!DEC$ prefetch unsorted 226!DEC$ ivdep 227!DEC$ loop count min(24), max(40), avg(32) 228 do j2 = 1,dim2 229!DEC$ loop count min(24), max(40), avg(32) 230 do j1 = 1,dim1 231!DEC$ loop count min(24), max(40), avg(32) 232!DEC$ unroll(8) 233 do j3 = 1,dim3 234!DEC$ loop count min(24), max(40), avg(32) 235!DEC$ unroll(8) 236!DEC$ vector always 237 do j4 = 1,dim4 238 old_offset = j4+dim4*(j3-1+dim3*(j2-1+dim2*(j1-1))) 239 new_offset = j4+dim4*(j3-1+dim3*(j2-1+dim2*(j1-1))) 240 sorted(new_offset) = unsorted(old_offset) * factor 241 enddo 242 enddo 243 enddo 244 enddo 245 return 246 end 247 subroutine transpose_1243_loop_1243(unsorted,sorted, 248 & dim1,dim2,dim3,dim4,factor) 249 implicit none 250 integer dim1,dim2,dim3,dim4 251 integer old_offset,new_offset 252 integer j1,j2,j3,j4 253 double precision sorted(dim1*dim2*dim3*dim4) 254 double precision unsorted(dim1*dim2*dim3*dim4) 255 double precision factor 256!DEC$ prefetch sorted 257!DEC$ prefetch unsorted 258!DEC$ ivdep 259!DEC$ loop count min(24), max(40), avg(32) 260 do j1 = 1,dim1 261!DEC$ loop count min(24), max(40), avg(32) 262 do j2 = 1,dim2 263!DEC$ loop count min(24), max(40), avg(32) 264!DEC$ unroll(8) 265 do j4 = 1,dim4 266!DEC$ loop count min(24), max(40), avg(32) 267!DEC$ unroll(8) 268!DEC$ vector always 269 do j3 = 1,dim3 270 old_offset = j4+dim4*(j3-1+dim3*(j2-1+dim2*(j1-1))) 271 new_offset = j3+dim3*(j4-1+dim4*(j2-1+dim2*(j1-1))) 272 sorted(new_offset) = unsorted(old_offset) * factor 273 enddo 274 enddo 275 enddo 276 enddo 277 return 278 end 279 subroutine transpose_1324_loop_1234(unsorted,sorted, 280 & dim1,dim2,dim3,dim4,factor) 281 implicit none 282 integer dim1,dim2,dim3,dim4 283 integer old_offset,new_offset 284 integer j1,j2,j3,j4 285 double precision sorted(dim1*dim2*dim3*dim4) 286 double precision unsorted(dim1*dim2*dim3*dim4) 287 double precision factor 288!DEC$ prefetch sorted 289!DEC$ prefetch unsorted 290!DEC$ ivdep 291!DEC$ loop count min(24), max(40), avg(32) 292 do j1 = 1,dim1 293!DEC$ loop count min(24), max(40), avg(32) 294 do j2 = 1,dim2 295!DEC$ loop count min(24), max(40), avg(32) 296!DEC$ unroll(8) 297 do j3 = 1,dim3 298!DEC$ loop count min(24), max(40), avg(32) 299!DEC$ unroll(8) 300!DEC$ vector always 301 do j4 = 1,dim4 302 old_offset = j4+dim4*(j3-1+dim3*(j2-1+dim2*(j1-1))) 303 new_offset = j4+dim4*(j2-1+dim2*(j3-1+dim3*(j1-1))) 304 sorted(new_offset) = unsorted(old_offset) * factor 305 enddo 306 enddo 307 enddo 308 enddo 309 return 310 end 311 subroutine transpose_1342_loop_1342(unsorted,sorted, 312 & dim1,dim2,dim3,dim4,factor) 313 implicit none 314 integer dim1,dim2,dim3,dim4 315 integer old_offset,new_offset 316 integer j1,j2,j3,j4 317 double precision sorted(dim1*dim2*dim3*dim4) 318 double precision unsorted(dim1*dim2*dim3*dim4) 319 double precision factor 320!DEC$ prefetch sorted 321!DEC$ prefetch unsorted 322!DEC$ ivdep 323!DEC$ loop count min(24), max(40), avg(32) 324 do j1 = 1,dim1 325!DEC$ loop count min(24), max(40), avg(32) 326 do j3 = 1,dim3 327!DEC$ loop count min(24), max(40), avg(32) 328!DEC$ unroll(8) 329 do j4 = 1,dim4 330!DEC$ loop count min(24), max(40), avg(32) 331!DEC$ unroll(8) 332!DEC$ vector always 333 do j2 = 1,dim2 334 old_offset = j4+dim4*(j3-1+dim3*(j2-1+dim2*(j1-1))) 335 new_offset = j2+dim2*(j4-1+dim4*(j3-1+dim3*(j1-1))) 336 sorted(new_offset) = unsorted(old_offset) * factor 337 enddo 338 enddo 339 enddo 340 enddo 341 return 342 end 343 subroutine transpose_1423_loop_1423(unsorted,sorted, 344 & dim1,dim2,dim3,dim4,factor) 345 implicit none 346 integer dim1,dim2,dim3,dim4 347 integer old_offset,new_offset 348 integer j1,j2,j3,j4 349 double precision sorted(dim1*dim2*dim3*dim4) 350 double precision unsorted(dim1*dim2*dim3*dim4) 351 double precision factor 352!DEC$ prefetch sorted 353!DEC$ prefetch unsorted 354!DEC$ ivdep 355!DEC$ loop count min(24), max(40), avg(32) 356 do j1 = 1,dim1 357!DEC$ loop count min(24), max(40), avg(32) 358 do j4 = 1,dim4 359!DEC$ loop count min(24), max(40), avg(32) 360!DEC$ unroll(8) 361 do j2 = 1,dim2 362!DEC$ loop count min(24), max(40), avg(32) 363!DEC$ unroll(8) 364!DEC$ vector always 365 do j3 = 1,dim3 366 old_offset = j4+dim4*(j3-1+dim3*(j2-1+dim2*(j1-1))) 367 new_offset = j3+dim3*(j2-1+dim2*(j4-1+dim4*(j1-1))) 368 sorted(new_offset) = unsorted(old_offset) * factor 369 enddo 370 enddo 371 enddo 372 enddo 373 return 374 end 375 subroutine transpose_1432_loop_1342(unsorted,sorted, 376 & dim1,dim2,dim3,dim4,factor) 377 implicit none 378 integer dim1,dim2,dim3,dim4 379 integer old_offset,new_offset 380 integer j1,j2,j3,j4 381 double precision sorted(dim1*dim2*dim3*dim4) 382 double precision unsorted(dim1*dim2*dim3*dim4) 383 double precision factor 384!DEC$ prefetch sorted 385!DEC$ prefetch unsorted 386!DEC$ ivdep 387!DEC$ loop count min(24), max(40), avg(32) 388 do j1 = 1,dim1 389!DEC$ loop count min(24), max(40), avg(32) 390 do j3 = 1,dim3 391!DEC$ loop count min(24), max(40), avg(32) 392!DEC$ unroll(8) 393 do j4 = 1,dim4 394!DEC$ loop count min(24), max(40), avg(32) 395!DEC$ unroll(8) 396!DEC$ vector always 397 do j2 = 1,dim2 398 old_offset = j4+dim4*(j3-1+dim3*(j2-1+dim2*(j1-1))) 399 new_offset = j2+dim2*(j3-1+dim3*(j4-1+dim4*(j1-1))) 400 sorted(new_offset) = unsorted(old_offset) * factor 401 enddo 402 enddo 403 enddo 404 enddo 405 return 406 end 407 subroutine transpose_2134_loop_2134(unsorted,sorted, 408 & dim1,dim2,dim3,dim4,factor) 409 implicit none 410 integer dim1,dim2,dim3,dim4 411 integer old_offset,new_offset 412 integer j1,j2,j3,j4 413 double precision sorted(dim1*dim2*dim3*dim4) 414 double precision unsorted(dim1*dim2*dim3*dim4) 415 double precision factor 416!DEC$ prefetch sorted 417!DEC$ prefetch unsorted 418!DEC$ ivdep 419!DEC$ loop count min(24), max(40), avg(32) 420 do j2 = 1,dim2 421!DEC$ loop count min(24), max(40), avg(32) 422 do j1 = 1,dim1 423!DEC$ loop count min(24), max(40), avg(32) 424!DEC$ unroll(8) 425 do j3 = 1,dim3 426!DEC$ loop count min(24), max(40), avg(32) 427!DEC$ unroll(8) 428!DEC$ vector always 429 do j4 = 1,dim4 430 old_offset = j4+dim4*(j3-1+dim3*(j2-1+dim2*(j1-1))) 431 new_offset = j4+dim4*(j3-1+dim3*(j1-1+dim1*(j2-1))) 432 sorted(new_offset) = unsorted(old_offset) * factor 433 enddo 434 enddo 435 enddo 436 enddo 437 return 438 end 439 subroutine transpose_2143_loop_2143(unsorted,sorted, 440 & dim1,dim2,dim3,dim4,factor) 441 implicit none 442 integer dim1,dim2,dim3,dim4 443 integer old_offset,new_offset 444 integer j1,j2,j3,j4 445 double precision sorted(dim1*dim2*dim3*dim4) 446 double precision unsorted(dim1*dim2*dim3*dim4) 447 double precision factor 448!DEC$ prefetch sorted 449!DEC$ prefetch unsorted 450!DEC$ ivdep 451!DEC$ loop count min(24), max(40), avg(32) 452 do j2 = 1,dim2 453!DEC$ loop count min(24), max(40), avg(32) 454 do j1 = 1,dim1 455!DEC$ loop count min(24), max(40), avg(32) 456!DEC$ unroll(8) 457 do j4 = 1,dim4 458!DEC$ loop count min(24), max(40), avg(32) 459!DEC$ unroll(8) 460!DEC$ vector always 461 do j3 = 1,dim3 462 old_offset = j4+dim4*(j3-1+dim3*(j2-1+dim2*(j1-1))) 463 new_offset = j3+dim3*(j4-1+dim4*(j1-1+dim1*(j2-1))) 464 sorted(new_offset) = unsorted(old_offset) * factor 465 enddo 466 enddo 467 enddo 468 enddo 469 return 470 end 471 subroutine transpose_2314_loop_2134(unsorted,sorted, 472 & dim1,dim2,dim3,dim4,factor) 473 implicit none 474 integer dim1,dim2,dim3,dim4 475 integer old_offset,new_offset 476 integer j1,j2,j3,j4 477 double precision sorted(dim1*dim2*dim3*dim4) 478 double precision unsorted(dim1*dim2*dim3*dim4) 479 double precision factor 480!DEC$ prefetch sorted 481!DEC$ prefetch unsorted 482!DEC$ ivdep 483!DEC$ loop count min(24), max(40), avg(32) 484 do j2 = 1,dim2 485!DEC$ loop count min(24), max(40), avg(32) 486 do j1 = 1,dim1 487!DEC$ loop count min(24), max(40), avg(32) 488!DEC$ unroll(8) 489 do j3 = 1,dim3 490!DEC$ loop count min(24), max(40), avg(32) 491!DEC$ unroll(8) 492!DEC$ vector always 493 do j4 = 1,dim4 494 old_offset = j4+dim4*(j3-1+dim3*(j2-1+dim2*(j1-1))) 495 new_offset = j4+dim4*(j1-1+dim1*(j3-1+dim3*(j2-1))) 496 sorted(new_offset) = unsorted(old_offset) * factor 497 enddo 498 enddo 499 enddo 500 enddo 501 return 502 end 503 subroutine transpose_2341_loop_2341(unsorted,sorted, 504 & dim1,dim2,dim3,dim4,factor) 505 implicit none 506 integer dim1,dim2,dim3,dim4 507 integer old_offset,new_offset 508 integer j1,j2,j3,j4 509 double precision sorted(dim1*dim2*dim3*dim4) 510 double precision unsorted(dim1*dim2*dim3*dim4) 511 double precision factor 512!DEC$ prefetch sorted 513!DEC$ prefetch unsorted 514!DEC$ ivdep 515!DEC$ loop count min(24), max(40), avg(32) 516 do j2 = 1,dim2 517!DEC$ loop count min(24), max(40), avg(32) 518 do j3 = 1,dim3 519!DEC$ loop count min(24), max(40), avg(32) 520!DEC$ unroll(8) 521 do j4 = 1,dim4 522!DEC$ loop count min(24), max(40), avg(32) 523!DEC$ unroll(8) 524!DEC$ vector always 525 do j1 = 1,dim1 526 old_offset = j4+dim4*(j3-1+dim3*(j2-1+dim2*(j1-1))) 527 new_offset = j1+dim1*(j4-1+dim4*(j3-1+dim3*(j2-1))) 528 sorted(new_offset) = unsorted(old_offset) * factor 529 enddo 530 enddo 531 enddo 532 enddo 533 return 534 end 535 subroutine transpose_2413_loop_2413(unsorted,sorted, 536 & dim1,dim2,dim3,dim4,factor) 537 implicit none 538 integer dim1,dim2,dim3,dim4 539 integer old_offset,new_offset 540 integer j1,j2,j3,j4 541 double precision sorted(dim1*dim2*dim3*dim4) 542 double precision unsorted(dim1*dim2*dim3*dim4) 543 double precision factor 544!DEC$ prefetch sorted 545!DEC$ prefetch unsorted 546!DEC$ ivdep 547!DEC$ loop count min(24), max(40), avg(32) 548 do j2 = 1,dim2 549!DEC$ loop count min(24), max(40), avg(32) 550 do j4 = 1,dim4 551!DEC$ loop count min(24), max(40), avg(32) 552!DEC$ unroll(8) 553 do j1 = 1,dim1 554!DEC$ loop count min(24), max(40), avg(32) 555!DEC$ unroll(8) 556!DEC$ vector always 557 do j3 = 1,dim3 558 old_offset = j4+dim4*(j3-1+dim3*(j2-1+dim2*(j1-1))) 559 new_offset = j3+dim3*(j1-1+dim1*(j4-1+dim4*(j2-1))) 560 sorted(new_offset) = unsorted(old_offset) * factor 561 enddo 562 enddo 563 enddo 564 enddo 565 return 566 end 567 subroutine transpose_2431_loop_2341(unsorted,sorted, 568 & dim1,dim2,dim3,dim4,factor) 569 implicit none 570 integer dim1,dim2,dim3,dim4 571 integer old_offset,new_offset 572 integer j1,j2,j3,j4 573 double precision sorted(dim1*dim2*dim3*dim4) 574 double precision unsorted(dim1*dim2*dim3*dim4) 575 double precision factor 576!DEC$ prefetch sorted 577!DEC$ prefetch unsorted 578!DEC$ ivdep 579!DEC$ loop count min(24), max(40), avg(32) 580 do j2 = 1,dim2 581!DEC$ loop count min(24), max(40), avg(32) 582 do j3 = 1,dim3 583!DEC$ loop count min(24), max(40), avg(32) 584!DEC$ unroll(8) 585 do j4 = 1,dim4 586!DEC$ loop count min(24), max(40), avg(32) 587!DEC$ unroll(8) 588!DEC$ vector always 589 do j1 = 1,dim1 590 old_offset = j4+dim4*(j3-1+dim3*(j2-1+dim2*(j1-1))) 591 new_offset = j1+dim1*(j3-1+dim3*(j4-1+dim4*(j2-1))) 592 sorted(new_offset) = unsorted(old_offset) * factor 593 enddo 594 enddo 595 enddo 596 enddo 597 return 598 end 599 subroutine transpose_3124_loop_1234(unsorted,sorted, 600 & dim1,dim2,dim3,dim4,factor) 601 implicit none 602 integer dim1,dim2,dim3,dim4 603 integer old_offset,new_offset 604 integer j1,j2,j3,j4 605 double precision sorted(dim1*dim2*dim3*dim4) 606 double precision unsorted(dim1*dim2*dim3*dim4) 607 double precision factor 608!DEC$ prefetch sorted 609!DEC$ prefetch unsorted 610!DEC$ ivdep 611!DEC$ loop count min(24), max(40), avg(32) 612 do j1 = 1,dim1 613!DEC$ loop count min(24), max(40), avg(32) 614 do j2 = 1,dim2 615!DEC$ loop count min(24), max(40), avg(32) 616!DEC$ unroll(8) 617 do j3 = 1,dim3 618!DEC$ loop count min(24), max(40), avg(32) 619!DEC$ unroll(8) 620!DEC$ vector always 621 do j4 = 1,dim4 622 old_offset = j4+dim4*(j3-1+dim3*(j2-1+dim2*(j1-1))) 623 new_offset = j4+dim4*(j2-1+dim2*(j1-1+dim1*(j3-1))) 624 sorted(new_offset) = unsorted(old_offset) * factor 625 enddo 626 enddo 627 enddo 628 enddo 629 return 630 end 631 subroutine transpose_3142_loop_1342(unsorted,sorted, 632 & dim1,dim2,dim3,dim4,factor) 633 implicit none 634 integer dim1,dim2,dim3,dim4 635 integer old_offset,new_offset 636 integer j1,j2,j3,j4 637 double precision sorted(dim1*dim2*dim3*dim4) 638 double precision unsorted(dim1*dim2*dim3*dim4) 639 double precision factor 640!DEC$ prefetch sorted 641!DEC$ prefetch unsorted 642!DEC$ ivdep 643!DEC$ loop count min(24), max(40), avg(32) 644 do j1 = 1,dim1 645!DEC$ loop count min(24), max(40), avg(32) 646 do j3 = 1,dim3 647!DEC$ loop count min(24), max(40), avg(32) 648!DEC$ unroll(8) 649 do j4 = 1,dim4 650!DEC$ loop count min(24), max(40), avg(32) 651!DEC$ unroll(8) 652!DEC$ vector always 653 do j2 = 1,dim2 654 old_offset = j4+dim4*(j3-1+dim3*(j2-1+dim2*(j1-1))) 655 new_offset = j2+dim2*(j4-1+dim4*(j1-1+dim1*(j3-1))) 656 sorted(new_offset) = unsorted(old_offset) * factor 657 enddo 658 enddo 659 enddo 660 enddo 661 return 662 end 663 subroutine transpose_3214_loop_2134(unsorted,sorted, 664 & dim1,dim2,dim3,dim4,factor) 665 implicit none 666 integer dim1,dim2,dim3,dim4 667 integer old_offset,new_offset 668 integer j1,j2,j3,j4 669 double precision sorted(dim1*dim2*dim3*dim4) 670 double precision unsorted(dim1*dim2*dim3*dim4) 671 double precision factor 672!DEC$ prefetch sorted 673!DEC$ prefetch unsorted 674!DEC$ ivdep 675!DEC$ loop count min(24), max(40), avg(32) 676 do j2 = 1,dim2 677!DEC$ loop count min(24), max(40), avg(32) 678 do j1 = 1,dim1 679!DEC$ loop count min(24), max(40), avg(32) 680!DEC$ unroll(8) 681 do j3 = 1,dim3 682!DEC$ loop count min(24), max(40), avg(32) 683!DEC$ unroll(8) 684!DEC$ vector always 685 do j4 = 1,dim4 686 old_offset = j4+dim4*(j3-1+dim3*(j2-1+dim2*(j1-1))) 687 new_offset = j4+dim4*(j1-1+dim1*(j2-1+dim2*(j3-1))) 688 sorted(new_offset) = unsorted(old_offset) * factor 689 enddo 690 enddo 691 enddo 692 enddo 693 return 694 end 695 subroutine transpose_3241_loop_2341(unsorted,sorted, 696 & dim1,dim2,dim3,dim4,factor) 697 implicit none 698 integer dim1,dim2,dim3,dim4 699 integer old_offset,new_offset 700 integer j1,j2,j3,j4 701 double precision sorted(dim1*dim2*dim3*dim4) 702 double precision unsorted(dim1*dim2*dim3*dim4) 703 double precision factor 704!DEC$ prefetch sorted 705!DEC$ prefetch unsorted 706!DEC$ ivdep 707!DEC$ loop count min(24), max(40), avg(32) 708 do j2 = 1,dim2 709!DEC$ loop count min(24), max(40), avg(32) 710 do j3 = 1,dim3 711!DEC$ loop count min(24), max(40), avg(32) 712!DEC$ unroll(8) 713 do j4 = 1,dim4 714!DEC$ loop count min(24), max(40), avg(32) 715!DEC$ unroll(8) 716!DEC$ vector always 717 do j1 = 1,dim1 718 old_offset = j4+dim4*(j3-1+dim3*(j2-1+dim2*(j1-1))) 719 new_offset = j1+dim1*(j4-1+dim4*(j2-1+dim2*(j3-1))) 720 sorted(new_offset) = unsorted(old_offset) * factor 721 enddo 722 enddo 723 enddo 724 enddo 725 return 726 end 727 subroutine transpose_3412_loop_1342(unsorted,sorted, 728 & dim1,dim2,dim3,dim4,factor) 729 implicit none 730 integer dim1,dim2,dim3,dim4 731 integer old_offset,new_offset 732 integer j1,j2,j3,j4 733 double precision sorted(dim1*dim2*dim3*dim4) 734 double precision unsorted(dim1*dim2*dim3*dim4) 735 double precision factor 736!DEC$ prefetch sorted 737!DEC$ prefetch unsorted 738!DEC$ ivdep 739!DEC$ loop count min(24), max(40), avg(32) 740 do j1 = 1,dim1 741!DEC$ loop count min(24), max(40), avg(32) 742 do j3 = 1,dim3 743!DEC$ loop count min(24), max(40), avg(32) 744!DEC$ unroll(8) 745 do j4 = 1,dim4 746!DEC$ loop count min(24), max(40), avg(32) 747!DEC$ unroll(8) 748!DEC$ vector always 749 do j2 = 1,dim2 750 old_offset = j4+dim4*(j3-1+dim3*(j2-1+dim2*(j1-1))) 751 new_offset = j2+dim2*(j1-1+dim1*(j4-1+dim4*(j3-1))) 752 sorted(new_offset) = unsorted(old_offset) * factor 753 enddo 754 enddo 755 enddo 756 enddo 757 return 758 end 759 subroutine transpose_3421_loop_2341(unsorted,sorted, 760 & dim1,dim2,dim3,dim4,factor) 761 implicit none 762 integer dim1,dim2,dim3,dim4 763 integer old_offset,new_offset 764 integer j1,j2,j3,j4 765 double precision sorted(dim1*dim2*dim3*dim4) 766 double precision unsorted(dim1*dim2*dim3*dim4) 767 double precision factor 768!DEC$ prefetch sorted 769!DEC$ prefetch unsorted 770!DEC$ ivdep 771!DEC$ loop count min(24), max(40), avg(32) 772 do j2 = 1,dim2 773!DEC$ loop count min(24), max(40), avg(32) 774 do j3 = 1,dim3 775!DEC$ loop count min(24), max(40), avg(32) 776!DEC$ unroll(8) 777 do j4 = 1,dim4 778!DEC$ loop count min(24), max(40), avg(32) 779!DEC$ unroll(8) 780!DEC$ vector always 781 do j1 = 1,dim1 782 old_offset = j4+dim4*(j3-1+dim3*(j2-1+dim2*(j1-1))) 783 new_offset = j1+dim1*(j2-1+dim2*(j4-1+dim4*(j3-1))) 784 sorted(new_offset) = unsorted(old_offset) * factor 785 enddo 786 enddo 787 enddo 788 enddo 789 return 790 end 791 subroutine transpose_4123_loop_1423(unsorted,sorted, 792 & dim1,dim2,dim3,dim4,factor) 793 implicit none 794 integer dim1,dim2,dim3,dim4 795 integer old_offset,new_offset 796 integer j1,j2,j3,j4 797 double precision sorted(dim1*dim2*dim3*dim4) 798 double precision unsorted(dim1*dim2*dim3*dim4) 799 double precision factor 800!DEC$ prefetch sorted 801!DEC$ prefetch unsorted 802!DEC$ ivdep 803!DEC$ loop count min(24), max(40), avg(32) 804 do j1 = 1,dim1 805!DEC$ loop count min(24), max(40), avg(32) 806 do j4 = 1,dim4 807!DEC$ loop count min(24), max(40), avg(32) 808!DEC$ unroll(8) 809 do j2 = 1,dim2 810!DEC$ loop count min(24), max(40), avg(32) 811!DEC$ unroll(8) 812!DEC$ vector always 813 do j3 = 1,dim3 814 old_offset = j4+dim4*(j3-1+dim3*(j2-1+dim2*(j1-1))) 815 new_offset = j3+dim3*(j2-1+dim2*(j1-1+dim1*(j4-1))) 816 sorted(new_offset) = unsorted(old_offset) * factor 817 enddo 818 enddo 819 enddo 820 enddo 821 return 822 end 823 subroutine transpose_4132_loop_1342(unsorted,sorted, 824 & dim1,dim2,dim3,dim4,factor) 825 implicit none 826 integer dim1,dim2,dim3,dim4 827 integer old_offset,new_offset 828 integer j1,j2,j3,j4 829 double precision sorted(dim1*dim2*dim3*dim4) 830 double precision unsorted(dim1*dim2*dim3*dim4) 831 double precision factor 832!DEC$ prefetch sorted 833!DEC$ prefetch unsorted 834!DEC$ ivdep 835!DEC$ loop count min(24), max(40), avg(32) 836 do j1 = 1,dim1 837!DEC$ loop count min(24), max(40), avg(32) 838 do j3 = 1,dim3 839!DEC$ loop count min(24), max(40), avg(32) 840!DEC$ unroll(8) 841 do j4 = 1,dim4 842!DEC$ loop count min(24), max(40), avg(32) 843!DEC$ unroll(8) 844!DEC$ vector always 845 do j2 = 1,dim2 846 old_offset = j4+dim4*(j3-1+dim3*(j2-1+dim2*(j1-1))) 847 new_offset = j2+dim2*(j3-1+dim3*(j1-1+dim1*(j4-1))) 848 sorted(new_offset) = unsorted(old_offset) * factor 849 enddo 850 enddo 851 enddo 852 enddo 853 return 854 end 855 subroutine transpose_4213_loop_2413(unsorted,sorted, 856 & dim1,dim2,dim3,dim4,factor) 857 implicit none 858 integer dim1,dim2,dim3,dim4 859 integer old_offset,new_offset 860 integer j1,j2,j3,j4 861 double precision sorted(dim1*dim2*dim3*dim4) 862 double precision unsorted(dim1*dim2*dim3*dim4) 863 double precision factor 864!DEC$ prefetch sorted 865!DEC$ prefetch unsorted 866!DEC$ ivdep 867!DEC$ loop count min(24), max(40), avg(32) 868 do j2 = 1,dim2 869!DEC$ loop count min(24), max(40), avg(32) 870 do j4 = 1,dim4 871!DEC$ loop count min(24), max(40), avg(32) 872!DEC$ unroll(8) 873 do j1 = 1,dim1 874!DEC$ loop count min(24), max(40), avg(32) 875!DEC$ unroll(8) 876!DEC$ vector always 877 do j3 = 1,dim3 878 old_offset = j4+dim4*(j3-1+dim3*(j2-1+dim2*(j1-1))) 879 new_offset = j3+dim3*(j1-1+dim1*(j2-1+dim2*(j4-1))) 880 sorted(new_offset) = unsorted(old_offset) * factor 881 enddo 882 enddo 883 enddo 884 enddo 885 return 886 end 887 subroutine transpose_4231_loop_2341(unsorted,sorted, 888 & dim1,dim2,dim3,dim4,factor) 889 implicit none 890 integer dim1,dim2,dim3,dim4 891 integer old_offset,new_offset 892 integer j1,j2,j3,j4 893 double precision sorted(dim1*dim2*dim3*dim4) 894 double precision unsorted(dim1*dim2*dim3*dim4) 895 double precision factor 896!DEC$ prefetch sorted 897!DEC$ prefetch unsorted 898!DEC$ ivdep 899!DEC$ loop count min(24), max(40), avg(32) 900 do j2 = 1,dim2 901!DEC$ loop count min(24), max(40), avg(32) 902 do j3 = 1,dim3 903!DEC$ loop count min(24), max(40), avg(32) 904!DEC$ unroll(8) 905 do j4 = 1,dim4 906!DEC$ loop count min(24), max(40), avg(32) 907!DEC$ unroll(8) 908!DEC$ vector always 909 do j1 = 1,dim1 910 old_offset = j4+dim4*(j3-1+dim3*(j2-1+dim2*(j1-1))) 911 new_offset = j1+dim1*(j3-1+dim3*(j2-1+dim2*(j4-1))) 912 sorted(new_offset) = unsorted(old_offset) * factor 913 enddo 914 enddo 915 enddo 916 enddo 917 return 918 end 919 subroutine transpose_4312_loop_1342(unsorted,sorted, 920 & dim1,dim2,dim3,dim4,factor) 921 implicit none 922 integer dim1,dim2,dim3,dim4 923 integer old_offset,new_offset 924 integer j1,j2,j3,j4 925 double precision sorted(dim1*dim2*dim3*dim4) 926 double precision unsorted(dim1*dim2*dim3*dim4) 927 double precision factor 928!DEC$ prefetch sorted 929!DEC$ prefetch unsorted 930!DEC$ ivdep 931!DEC$ loop count min(24), max(40), avg(32) 932 do j1 = 1,dim1 933!DEC$ loop count min(24), max(40), avg(32) 934 do j3 = 1,dim3 935!DEC$ loop count min(24), max(40), avg(32) 936!DEC$ unroll(8) 937 do j4 = 1,dim4 938!DEC$ loop count min(24), max(40), avg(32) 939!DEC$ unroll(8) 940!DEC$ vector always 941 do j2 = 1,dim2 942 old_offset = j4+dim4*(j3-1+dim3*(j2-1+dim2*(j1-1))) 943 new_offset = j2+dim2*(j1-1+dim1*(j3-1+dim3*(j4-1))) 944 sorted(new_offset) = unsorted(old_offset) * factor 945 enddo 946 enddo 947 enddo 948 enddo 949 return 950 end 951 subroutine transpose_4321_loop_2341(unsorted,sorted, 952 & dim1,dim2,dim3,dim4,factor) 953 implicit none 954 integer dim1,dim2,dim3,dim4 955 integer old_offset,new_offset 956 integer j1,j2,j3,j4 957 double precision sorted(dim1*dim2*dim3*dim4) 958 double precision unsorted(dim1*dim2*dim3*dim4) 959 double precision factor 960!DEC$ prefetch sorted 961!DEC$ prefetch unsorted 962!DEC$ ivdep 963!DEC$ loop count min(24), max(40), avg(32) 964 do j2 = 1,dim2 965!DEC$ loop count min(24), max(40), avg(32) 966 do j3 = 1,dim3 967!DEC$ loop count min(24), max(40), avg(32) 968!DEC$ unroll(8) 969 do j4 = 1,dim4 970!DEC$ loop count min(24), max(40), avg(32) 971!DEC$ unroll(8) 972!DEC$ vector always 973 do j1 = 1,dim1 974 old_offset = j4+dim4*(j3-1+dim3*(j2-1+dim2*(j1-1))) 975 new_offset = j1+dim1*(j2-1+dim2*(j3-1+dim3*(j4-1))) 976 sorted(new_offset) = unsorted(old_offset) * factor 977 enddo 978 enddo 979 enddo 980 enddo 981 return 982 end 983c $Id$ 984