Lines Matching refs:BLOCK

87 #define BLOCK 16  macro
92 #define BLOCKSQ (BLOCK * BLOCK)
94 #define number_blocks(x) (((x) + BLOCK - 1) >> BLOCKSHIFT)
101 int numberBlocks = (numberRows_ + BLOCK - 1) >> BLOCKSHIFT; in reserveSpace()
128 int numberBlocks = (numberRows + BLOCK - 1) >> BLOCKSHIFT; in space()
527 int numberBlocks = (numberRows_ + BLOCK - 1) >> BLOCKSHIFT; in bNumber()
558 int numberBlocks = (numberRows_ + BLOCK - 1) >> BLOCKSHIFT; in factorizePart2()
562 int nRound = numberRows_ & (~(BLOCK - 1)); in factorizePart2()
565 nRound -= BLOCK; in factorizePart2()
571 if (sizeLastBlock != BLOCK) { in factorizePart2()
577 put -= (BLOCK - sizeLastBlock) * (BLOCK + 1); in factorizePart2()
580 put -= BLOCK; in factorizePart2()
597 for (; n > 0; n -= BLOCK) { in factorizePart2()
606 putLast = BLOCKSQ - BLOCK + sizeLastBlock; in factorizePart2()
608 for (iColumn = n - 1; iColumn >= n - BLOCK; iColumn--) { in factorizePart2()
615 putLast -= BLOCK - sizeLastBlock; in factorizePart2()
621 int last = CoinMax(j - BLOCK, iColumn); in factorizePart2()
626 if (j - BLOCK < iColumn) { in factorizePart2()
630 j -= BLOCK; in factorizePart2()
633 put -= BLOCK; in factorizePart2()
673 if (n <= BLOCK) { in ClpCholeskyCfactor()
696 if (nThis <= BLOCK && nLeft <= BLOCK) { in ClpCholeskyCtriRec()
730 if (nTri <= BLOCK && nDo <= BLOCK) { in ClpCholeskyCrecTri()
768 if (nDo <= BLOCK && nUnder <= BLOCK && nUnderK <= BLOCK) { in ClpCholeskyCrecRec()
769 assert(nDo == BLOCK && nUnder == BLOCK); in ClpCholeskyCrecRec()
812 aa = a - BLOCK; in ClpCholeskyCfactorLeaf()
816 aa += BLOCK; in ClpCholeskyCfactorLeaf()
820 t00 -= a[j + k * BLOCK] * a[j + k * BLOCK] * multiplier; in ClpCholeskyCfactorLeaf()
855 t00 -= a[i + k * BLOCK] * a[j + k * BLOCK] * multiplier; in ClpCholeskyCfactorLeaf()
883 assert(diagonal == thisStruct->diagonal_ + ict * BLOCK); in ClpCholeskyCtriRecLeaf()
888 if (nUnder == BLOCK) { in ClpCholeskyCtriRecLeaf()
889 aa = aTri - 2 * BLOCK; in ClpCholeskyCtriRecLeaf()
890 for (j = 0; j < BLOCK; j += 2) { in ClpCholeskyCtriRecLeaf()
894 aa += 2 * BLOCK; in ClpCholeskyCtriRecLeaf()
895 for (i = 0; i < BLOCK; i += 2) { in ClpCholeskyCtriRecLeaf()
897 CoinWorkDouble t00 = aUnder[i + j * BLOCK]; in ClpCholeskyCtriRecLeaf()
898 CoinWorkDouble t10 = aUnder[i + BLOCK + j * BLOCK]; in ClpCholeskyCtriRecLeaf()
899 CoinWorkDouble t01 = aUnder[i + 1 + j * BLOCK]; in ClpCholeskyCtriRecLeaf()
900 CoinWorkDouble t11 = aUnder[i + 1 + BLOCK + j * BLOCK]; in ClpCholeskyCtriRecLeaf()
904 CoinWorkDouble au0 = aUnder[i + k * BLOCK] * multiplier; in ClpCholeskyCtriRecLeaf()
905 CoinWorkDouble au1 = aUnder[i + 1 + k * BLOCK] * multiplier; in ClpCholeskyCtriRecLeaf()
906 CoinWorkDouble at0 = aTri[j + k * BLOCK]; in ClpCholeskyCtriRecLeaf()
907 CoinWorkDouble at1 = aTri[j + 1 + k * BLOCK]; in ClpCholeskyCtriRecLeaf()
914 at1 = aTri[j + 1 + j * BLOCK] * work[j]; in ClpCholeskyCtriRecLeaf()
918 aUnder[i + j * BLOCK] = t00; in ClpCholeskyCtriRecLeaf()
919 aUnder[i + 1 + j * BLOCK] = t01; in ClpCholeskyCtriRecLeaf()
920 aUnder[i + BLOCK + j * BLOCK] = t10 * temp1; in ClpCholeskyCtriRecLeaf()
921 aUnder[i + 1 + BLOCK + j * BLOCK] = t11 * temp1; in ClpCholeskyCtriRecLeaf()
926 aa = aTri - BLOCK; in ClpCholeskyCtriRecLeaf()
927 for (j = 0; j < BLOCK; j++) { in ClpCholeskyCtriRecLeaf()
930 aa += BLOCK; in ClpCholeskyCtriRecLeaf()
933 CoinWorkDouble t00 = aUnder[i + j * BLOCK]; in ClpCholeskyCtriRecLeaf()
936 t00 -= aUnder[i + k * BLOCK] * aTri[j + k * BLOCK] * multiplier; in ClpCholeskyCtriRecLeaf()
938 aUnder[i + j * BLOCK] = t00 * temp1; in ClpCholeskyCtriRecLeaf()
957 assert(diagonal == thisStruct->diagonal_ + icu * BLOCK); in ClpCholeskyCrecTriLeaf()
963 if (nUnder == BLOCK) { in ClpCholeskyCrecTriLeaf()
965 aa = aTri - 2 * BLOCK; in ClpCholeskyCrecTriLeaf()
966 for (j = 0; j < BLOCK; j += 2) { in ClpCholeskyCrecTriLeaf()
968 aa += 2 * BLOCK; in ClpCholeskyCrecTriLeaf()
972 t10 = aa[j + 1 + BLOCK]; in ClpCholeskyCrecTriLeaf()
973 for (k = 0; k < BLOCK; ++k) { in ClpCholeskyCrecTriLeaf()
975 CoinWorkDouble a0 = aUnder2[k * BLOCK]; in ClpCholeskyCrecTriLeaf()
976 CoinWorkDouble a1 = aUnder2[1 + k * BLOCK]; in ClpCholeskyCrecTriLeaf()
985 aa[j + 1 + BLOCK] = t10; in ClpCholeskyCrecTriLeaf()
986 for (i = j + 2; i < BLOCK; i += 2) { in ClpCholeskyCrecTriLeaf()
988 t01 = aa[i + BLOCK]; in ClpCholeskyCrecTriLeaf()
990 t11 = aa[i + 1 + BLOCK]; in ClpCholeskyCrecTriLeaf()
991 for (k = 0; k < BLOCK; ++k) { in ClpCholeskyCrecTriLeaf()
993 CoinWorkDouble a0 = aUnder2[k * BLOCK] * multiplier; in ClpCholeskyCrecTriLeaf()
994 CoinWorkDouble a1 = aUnder2[1 + k * BLOCK] * multiplier; in ClpCholeskyCrecTriLeaf()
995 t00 -= aUnder[i + k * BLOCK] * a0; in ClpCholeskyCrecTriLeaf()
996 t01 -= aUnder[i + k * BLOCK] * a1; in ClpCholeskyCrecTriLeaf()
997 t10 -= aUnder[i + 1 + k * BLOCK] * a0; in ClpCholeskyCrecTriLeaf()
998 t11 -= aUnder[i + 1 + k * BLOCK] * a1; in ClpCholeskyCrecTriLeaf()
1001 aa[i + BLOCK] = t01; in ClpCholeskyCrecTriLeaf()
1003 aa[i + 1 + BLOCK] = t11; in ClpCholeskyCrecTriLeaf()
1008 aa = aTri - BLOCK; in ClpCholeskyCrecTriLeaf()
1010 aa += BLOCK; in ClpCholeskyCrecTriLeaf()
1013 for (k = 0; k < BLOCK; ++k) { in ClpCholeskyCrecTriLeaf()
1015 t00 -= aUnder[i + k * BLOCK] * aUnder[j + k * BLOCK] * multiplier; in ClpCholeskyCrecTriLeaf()
1049 aa = aOther - 4 * BLOCK; in ClpCholeskyCrecRecLeaf()
1050 if (nUnder == BLOCK) { in ClpCholeskyCrecRecLeaf()
1053 aa += 2 * BLOCK; in ClpCholeskyCrecRecLeaf()
1054 for (j = 0; j < BLOCK; j += 2) { in ClpCholeskyCrecRecLeaf()
1055 aa += 2 * BLOCK; in ClpCholeskyCrecRecLeaf()
1056 for (i = 0; i < BLOCK; i += 2) { in ClpCholeskyCrecRecLeaf()
1057 CoinWorkDouble t00 = aa[i + 0 * BLOCK]; in ClpCholeskyCrecRecLeaf()
1058 CoinWorkDouble t10 = aa[i + 1 * BLOCK]; in ClpCholeskyCrecRecLeaf()
1059 CoinWorkDouble t01 = aa[i + 1 + 0 * BLOCK]; in ClpCholeskyCrecRecLeaf()
1060 CoinWorkDouble t11 = aa[i + 1 + 1 * BLOCK]; in ClpCholeskyCrecRecLeaf()
1061 for (k = 0; k < BLOCK; k++) { in ClpCholeskyCrecRecLeaf()
1063 CoinWorkDouble a00 = aUnder[i + k * BLOCK] * multiplier; in ClpCholeskyCrecRecLeaf()
1064 CoinWorkDouble a01 = aUnder[i + 1 + k * BLOCK] * multiplier; in ClpCholeskyCrecRecLeaf()
1065 t00 -= a00 * above[j + 0 + k * BLOCK]; in ClpCholeskyCrecRecLeaf()
1066 t10 -= a00 * above[j + 1 + k * BLOCK]; in ClpCholeskyCrecRecLeaf()
1067 t01 -= a01 * above[j + 0 + k * BLOCK]; in ClpCholeskyCrecRecLeaf()
1068 t11 -= a01 * above[j + 1 + k * BLOCK]; in ClpCholeskyCrecRecLeaf()
1070 aa[i + 0 * BLOCK] = t00; in ClpCholeskyCrecRecLeaf()
1071 aa[i + 1 * BLOCK] = t10; in ClpCholeskyCrecRecLeaf()
1072 aa[i + 1 + 0 * BLOCK] = t01; in ClpCholeskyCrecRecLeaf()
1073 aa[i + 1 + 1 * BLOCK] = t11; in ClpCholeskyCrecRecLeaf()
1077 for (j = 0; j < BLOCK; j += 4) { in ClpCholeskyCrecRecLeaf()
1078 aa += 4 * BLOCK; in ClpCholeskyCrecRecLeaf()
1079 for (i = 0; i < BLOCK; i += 4) { in ClpCholeskyCrecRecLeaf()
1080 CoinWorkDouble t00 = aa[i + 0 + 0 * BLOCK]; in ClpCholeskyCrecRecLeaf()
1081 CoinWorkDouble t10 = aa[i + 0 + 1 * BLOCK]; in ClpCholeskyCrecRecLeaf()
1082 CoinWorkDouble t20 = aa[i + 0 + 2 * BLOCK]; in ClpCholeskyCrecRecLeaf()
1083 CoinWorkDouble t30 = aa[i + 0 + 3 * BLOCK]; in ClpCholeskyCrecRecLeaf()
1084 CoinWorkDouble t01 = aa[i + 1 + 0 * BLOCK]; in ClpCholeskyCrecRecLeaf()
1085 CoinWorkDouble t11 = aa[i + 1 + 1 * BLOCK]; in ClpCholeskyCrecRecLeaf()
1086 CoinWorkDouble t21 = aa[i + 1 + 2 * BLOCK]; in ClpCholeskyCrecRecLeaf()
1087 CoinWorkDouble t31 = aa[i + 1 + 3 * BLOCK]; in ClpCholeskyCrecRecLeaf()
1088 CoinWorkDouble t02 = aa[i + 2 + 0 * BLOCK]; in ClpCholeskyCrecRecLeaf()
1089 CoinWorkDouble t12 = aa[i + 2 + 1 * BLOCK]; in ClpCholeskyCrecRecLeaf()
1090 CoinWorkDouble t22 = aa[i + 2 + 2 * BLOCK]; in ClpCholeskyCrecRecLeaf()
1091 CoinWorkDouble t32 = aa[i + 2 + 3 * BLOCK]; in ClpCholeskyCrecRecLeaf()
1092 CoinWorkDouble t03 = aa[i + 3 + 0 * BLOCK]; in ClpCholeskyCrecRecLeaf()
1093 CoinWorkDouble t13 = aa[i + 3 + 1 * BLOCK]; in ClpCholeskyCrecRecLeaf()
1094 CoinWorkDouble t23 = aa[i + 3 + 2 * BLOCK]; in ClpCholeskyCrecRecLeaf()
1095 CoinWorkDouble t33 = aa[i + 3 + 3 * BLOCK]; in ClpCholeskyCrecRecLeaf()
1098 for (k = 0; k < BLOCK; k++) { in ClpCholeskyCrecRecLeaf()
1120 aUnderNow += BLOCK; in ClpCholeskyCrecRecLeaf()
1121 aboveNow += BLOCK; in ClpCholeskyCrecRecLeaf()
1123 aa[i + 0 + 0 * BLOCK] = t00; in ClpCholeskyCrecRecLeaf()
1124 aa[i + 0 + 1 * BLOCK] = t10; in ClpCholeskyCrecRecLeaf()
1125 aa[i + 0 + 2 * BLOCK] = t20; in ClpCholeskyCrecRecLeaf()
1126 aa[i + 0 + 3 * BLOCK] = t30; in ClpCholeskyCrecRecLeaf()
1127 aa[i + 1 + 0 * BLOCK] = t01; in ClpCholeskyCrecRecLeaf()
1128 aa[i + 1 + 1 * BLOCK] = t11; in ClpCholeskyCrecRecLeaf()
1129 aa[i + 1 + 2 * BLOCK] = t21; in ClpCholeskyCrecRecLeaf()
1130 aa[i + 1 + 3 * BLOCK] = t31; in ClpCholeskyCrecRecLeaf()
1131 aa[i + 2 + 0 * BLOCK] = t02; in ClpCholeskyCrecRecLeaf()
1132 aa[i + 2 + 1 * BLOCK] = t12; in ClpCholeskyCrecRecLeaf()
1133 aa[i + 2 + 2 * BLOCK] = t22; in ClpCholeskyCrecRecLeaf()
1134 aa[i + 2 + 3 * BLOCK] = t32; in ClpCholeskyCrecRecLeaf()
1135 aa[i + 3 + 0 * BLOCK] = t03; in ClpCholeskyCrecRecLeaf()
1136 aa[i + 3 + 1 * BLOCK] = t13; in ClpCholeskyCrecRecLeaf()
1137 aa[i + 3 + 2 * BLOCK] = t23; in ClpCholeskyCrecRecLeaf()
1138 aa[i + 3 + 3 * BLOCK] = t33; in ClpCholeskyCrecRecLeaf()
1145 for (j = 0; j < BLOCK; j += 4) { in ClpCholeskyCrecRecLeaf()
1146 aa += 4 * BLOCK; in ClpCholeskyCrecRecLeaf()
1148 CoinWorkDouble t00 = aa[i + 0 * BLOCK]; in ClpCholeskyCrecRecLeaf()
1149 CoinWorkDouble t10 = aa[i + 1 * BLOCK]; in ClpCholeskyCrecRecLeaf()
1150 CoinWorkDouble t20 = aa[i + 2 * BLOCK]; in ClpCholeskyCrecRecLeaf()
1151 CoinWorkDouble t30 = aa[i + 3 * BLOCK]; in ClpCholeskyCrecRecLeaf()
1152 CoinWorkDouble t01 = aa[i + 1 + 0 * BLOCK]; in ClpCholeskyCrecRecLeaf()
1153 CoinWorkDouble t11 = aa[i + 1 + 1 * BLOCK]; in ClpCholeskyCrecRecLeaf()
1154 CoinWorkDouble t21 = aa[i + 1 + 2 * BLOCK]; in ClpCholeskyCrecRecLeaf()
1155 CoinWorkDouble t31 = aa[i + 1 + 3 * BLOCK]; in ClpCholeskyCrecRecLeaf()
1158 for (k = 0; k < BLOCK; k++) { in ClpCholeskyCrecRecLeaf()
1170 aUnderNow += BLOCK; in ClpCholeskyCrecRecLeaf()
1171 aboveNow += BLOCK; in ClpCholeskyCrecRecLeaf()
1173 aa[i + 0 * BLOCK] = t00; in ClpCholeskyCrecRecLeaf()
1174 aa[i + 1 * BLOCK] = t10; in ClpCholeskyCrecRecLeaf()
1175 aa[i + 2 * BLOCK] = t20; in ClpCholeskyCrecRecLeaf()
1176 aa[i + 3 * BLOCK] = t30; in ClpCholeskyCrecRecLeaf()
1177 aa[i + 1 + 0 * BLOCK] = t01; in ClpCholeskyCrecRecLeaf()
1178 aa[i + 1 + 1 * BLOCK] = t11; in ClpCholeskyCrecRecLeaf()
1179 aa[i + 1 + 2 * BLOCK] = t21; in ClpCholeskyCrecRecLeaf()
1180 aa[i + 1 + 3 * BLOCK] = t31; in ClpCholeskyCrecRecLeaf()
1183 CoinWorkDouble t0 = aa[n + 0 * BLOCK]; in ClpCholeskyCrecRecLeaf()
1184 CoinWorkDouble t1 = aa[n + 1 * BLOCK]; in ClpCholeskyCrecRecLeaf()
1185 CoinWorkDouble t2 = aa[n + 2 * BLOCK]; in ClpCholeskyCrecRecLeaf()
1186 CoinWorkDouble t3 = aa[n + 3 * BLOCK]; in ClpCholeskyCrecRecLeaf()
1188 for (k = 0; k < BLOCK; k++) { in ClpCholeskyCrecRecLeaf()
1189 a0 = aUnder[n + k * BLOCK] * work[k]; in ClpCholeskyCrecRecLeaf()
1190 t0 -= a0 * above[j + 0 + k * BLOCK]; in ClpCholeskyCrecRecLeaf()
1191 t1 -= a0 * above[j + 1 + k * BLOCK]; in ClpCholeskyCrecRecLeaf()
1192 t2 -= a0 * above[j + 2 + k * BLOCK]; in ClpCholeskyCrecRecLeaf()
1193 t3 -= a0 * above[j + 3 + k * BLOCK]; in ClpCholeskyCrecRecLeaf()
1195 aa[n + 0 * BLOCK] = t0; in ClpCholeskyCrecRecLeaf()
1196 aa[n + 1 * BLOCK] = t1; in ClpCholeskyCrecRecLeaf()
1197 aa[n + 2 * BLOCK] = t2; in ClpCholeskyCrecRecLeaf()
1198 aa[n + 3 * BLOCK] = t3; in ClpCholeskyCrecRecLeaf()
1203 aa = aOther - BLOCK; in ClpCholeskyCrecRecLeaf()
1204 for (j = 0; j < BLOCK; j++) { in ClpCholeskyCrecRecLeaf()
1205 aa += BLOCK; in ClpCholeskyCrecRecLeaf()
1207 CoinWorkDouble t00 = aa[i + 0 * BLOCK]; in ClpCholeskyCrecRecLeaf()
1208 for (k = 0; k < BLOCK; k++) { in ClpCholeskyCrecRecLeaf()
1209 CoinWorkDouble a00 = aUnder[i + k * BLOCK] * work[k]; in ClpCholeskyCrecRecLeaf()
1210 t00 -= a00 * above[j + k * BLOCK]; in ClpCholeskyCrecRecLeaf()
1254 int numberBlocks = (numberRows_ + BLOCK - 1) >> BLOCKSHIFT; in solve()
1263 int iDo = iBlock * BLOCK; in solve()
1265 if (iDo + BLOCK > numberRows_) { in solve()
1268 nChunk = BLOCK; in solve()
1272 base += BLOCK; in solve()
1274 if (base + BLOCK > numberRows_) { in solve()
1277 nChunk = BLOCK; in solve()
1288 int lBase = (numberBlocks - 1) * BLOCK; in solve()
1292 int triBase = iBlock * BLOCK; in solve()
1295 if (iBase + BLOCK > numberRows_) { in solve()
1298 nChunk = BLOCK; in solve()
1301 iBase -= BLOCK; in solve()
1304 if (triBase + BLOCK > numberRows_) { in solve()
1307 nChunk = BLOCK; in solve()
1329 t00 -= region[k] * a[j + k * BLOCK]; in solveF1()
1340 if (n == BLOCK) { in solveF2()
1341 for (k = 0; k < BLOCK; k += 4) { in solveF2()
1346 t0 -= region[0] * a[0 + 0 * BLOCK]; in solveF2()
1347 t1 -= region[0] * a[1 + 0 * BLOCK]; in solveF2()
1348 t2 -= region[0] * a[2 + 0 * BLOCK]; in solveF2()
1349 t3 -= region[0] * a[3 + 0 * BLOCK]; in solveF2()
1351 t0 -= region[1] * a[0 + 1 * BLOCK]; in solveF2()
1352 t1 -= region[1] * a[1 + 1 * BLOCK]; in solveF2()
1353 t2 -= region[1] * a[2 + 1 * BLOCK]; in solveF2()
1354 t3 -= region[1] * a[3 + 1 * BLOCK]; in solveF2()
1356 t0 -= region[2] * a[0 + 2 * BLOCK]; in solveF2()
1357 t1 -= region[2] * a[1 + 2 * BLOCK]; in solveF2()
1358 t2 -= region[2] * a[2 + 2 * BLOCK]; in solveF2()
1359 t3 -= region[2] * a[3 + 2 * BLOCK]; in solveF2()
1361 t0 -= region[3] * a[0 + 3 * BLOCK]; in solveF2()
1362 t1 -= region[3] * a[1 + 3 * BLOCK]; in solveF2()
1363 t2 -= region[3] * a[2 + 3 * BLOCK]; in solveF2()
1364 t3 -= region[3] * a[3 + 3 * BLOCK]; in solveF2()
1366 t0 -= region[4] * a[0 + 4 * BLOCK]; in solveF2()
1367 t1 -= region[4] * a[1 + 4 * BLOCK]; in solveF2()
1368 t2 -= region[4] * a[2 + 4 * BLOCK]; in solveF2()
1369 t3 -= region[4] * a[3 + 4 * BLOCK]; in solveF2()
1371 t0 -= region[5] * a[0 + 5 * BLOCK]; in solveF2()
1372 t1 -= region[5] * a[1 + 5 * BLOCK]; in solveF2()
1373 t2 -= region[5] * a[2 + 5 * BLOCK]; in solveF2()
1374 t3 -= region[5] * a[3 + 5 * BLOCK]; in solveF2()
1376 t0 -= region[6] * a[0 + 6 * BLOCK]; in solveF2()
1377 t1 -= region[6] * a[1 + 6 * BLOCK]; in solveF2()
1378 t2 -= region[6] * a[2 + 6 * BLOCK]; in solveF2()
1379 t3 -= region[6] * a[3 + 6 * BLOCK]; in solveF2()
1381 t0 -= region[7] * a[0 + 7 * BLOCK]; in solveF2()
1382 t1 -= region[7] * a[1 + 7 * BLOCK]; in solveF2()
1383 t2 -= region[7] * a[2 + 7 * BLOCK]; in solveF2()
1384 t3 -= region[7] * a[3 + 7 * BLOCK]; in solveF2()
1385 #if BLOCK > 8 in solveF2()
1386 t0 -= region[8] * a[0 + 8 * BLOCK]; in solveF2()
1387 t1 -= region[8] * a[1 + 8 * BLOCK]; in solveF2()
1388 t2 -= region[8] * a[2 + 8 * BLOCK]; in solveF2()
1389 t3 -= region[8] * a[3 + 8 * BLOCK]; in solveF2()
1391 t0 -= region[9] * a[0 + 9 * BLOCK]; in solveF2()
1392 t1 -= region[9] * a[1 + 9 * BLOCK]; in solveF2()
1393 t2 -= region[9] * a[2 + 9 * BLOCK]; in solveF2()
1394 t3 -= region[9] * a[3 + 9 * BLOCK]; in solveF2()
1396 t0 -= region[10] * a[0 + 10 * BLOCK]; in solveF2()
1397 t1 -= region[10] * a[1 + 10 * BLOCK]; in solveF2()
1398 t2 -= region[10] * a[2 + 10 * BLOCK]; in solveF2()
1399 t3 -= region[10] * a[3 + 10 * BLOCK]; in solveF2()
1401 t0 -= region[11] * a[0 + 11 * BLOCK]; in solveF2()
1402 t1 -= region[11] * a[1 + 11 * BLOCK]; in solveF2()
1403 t2 -= region[11] * a[2 + 11 * BLOCK]; in solveF2()
1404 t3 -= region[11] * a[3 + 11 * BLOCK]; in solveF2()
1406 t0 -= region[12] * a[0 + 12 * BLOCK]; in solveF2()
1407 t1 -= region[12] * a[1 + 12 * BLOCK]; in solveF2()
1408 t2 -= region[12] * a[2 + 12 * BLOCK]; in solveF2()
1409 t3 -= region[12] * a[3 + 12 * BLOCK]; in solveF2()
1411 t0 -= region[13] * a[0 + 13 * BLOCK]; in solveF2()
1412 t1 -= region[13] * a[1 + 13 * BLOCK]; in solveF2()
1413 t2 -= region[13] * a[2 + 13 * BLOCK]; in solveF2()
1414 t3 -= region[13] * a[3 + 13 * BLOCK]; in solveF2()
1416 t0 -= region[14] * a[0 + 14 * BLOCK]; in solveF2()
1417 t1 -= region[14] * a[1 + 14 * BLOCK]; in solveF2()
1418 t2 -= region[14] * a[2 + 14 * BLOCK]; in solveF2()
1419 t3 -= region[14] * a[3 + 14 * BLOCK]; in solveF2()
1421 t0 -= region[15] * a[0 + 15 * BLOCK]; in solveF2()
1422 t1 -= region[15] * a[1 + 15 * BLOCK]; in solveF2()
1423 t2 -= region[15] * a[2 + 15 * BLOCK]; in solveF2()
1424 t3 -= region[15] * a[3 + 15 * BLOCK]; in solveF2()
1437 for (j = 0; j < BLOCK; j++) { in solveF2()
1438 t00 -= region[j] * a[k + j * BLOCK]; in solveF2()
1454 t00 -= region[k] * a[k + j * BLOCK]; in solveB1()
1465 if (n == BLOCK) { in solveB2()
1466 for (j = 0; j < BLOCK; j += 4) { in solveB2()
1471 t0 -= region2[0] * a[0 + 0 * BLOCK]; in solveB2()
1472 t1 -= region2[0] * a[0 + 1 * BLOCK]; in solveB2()
1473 t2 -= region2[0] * a[0 + 2 * BLOCK]; in solveB2()
1474 t3 -= region2[0] * a[0 + 3 * BLOCK]; in solveB2()
1476 t0 -= region2[1] * a[1 + 0 * BLOCK]; in solveB2()
1477 t1 -= region2[1] * a[1 + 1 * BLOCK]; in solveB2()
1478 t2 -= region2[1] * a[1 + 2 * BLOCK]; in solveB2()
1479 t3 -= region2[1] * a[1 + 3 * BLOCK]; in solveB2()
1481 t0 -= region2[2] * a[2 + 0 * BLOCK]; in solveB2()
1482 t1 -= region2[2] * a[2 + 1 * BLOCK]; in solveB2()
1483 t2 -= region2[2] * a[2 + 2 * BLOCK]; in solveB2()
1484 t3 -= region2[2] * a[2 + 3 * BLOCK]; in solveB2()
1486 t0 -= region2[3] * a[3 + 0 * BLOCK]; in solveB2()
1487 t1 -= region2[3] * a[3 + 1 * BLOCK]; in solveB2()
1488 t2 -= region2[3] * a[3 + 2 * BLOCK]; in solveB2()
1489 t3 -= region2[3] * a[3 + 3 * BLOCK]; in solveB2()
1491 t0 -= region2[4] * a[4 + 0 * BLOCK]; in solveB2()
1492 t1 -= region2[4] * a[4 + 1 * BLOCK]; in solveB2()
1493 t2 -= region2[4] * a[4 + 2 * BLOCK]; in solveB2()
1494 t3 -= region2[4] * a[4 + 3 * BLOCK]; in solveB2()
1496 t0 -= region2[5] * a[5 + 0 * BLOCK]; in solveB2()
1497 t1 -= region2[5] * a[5 + 1 * BLOCK]; in solveB2()
1498 t2 -= region2[5] * a[5 + 2 * BLOCK]; in solveB2()
1499 t3 -= region2[5] * a[5 + 3 * BLOCK]; in solveB2()
1501 t0 -= region2[6] * a[6 + 0 * BLOCK]; in solveB2()
1502 t1 -= region2[6] * a[6 + 1 * BLOCK]; in solveB2()
1503 t2 -= region2[6] * a[6 + 2 * BLOCK]; in solveB2()
1504 t3 -= region2[6] * a[6 + 3 * BLOCK]; in solveB2()
1506 t0 -= region2[7] * a[7 + 0 * BLOCK]; in solveB2()
1507 t1 -= region2[7] * a[7 + 1 * BLOCK]; in solveB2()
1508 t2 -= region2[7] * a[7 + 2 * BLOCK]; in solveB2()
1509 t3 -= region2[7] * a[7 + 3 * BLOCK]; in solveB2()
1510 #if BLOCK > 8 in solveB2()
1512 t0 -= region2[8] * a[8 + 0 * BLOCK]; in solveB2()
1513 t1 -= region2[8] * a[8 + 1 * BLOCK]; in solveB2()
1514 t2 -= region2[8] * a[8 + 2 * BLOCK]; in solveB2()
1515 t3 -= region2[8] * a[8 + 3 * BLOCK]; in solveB2()
1517 t0 -= region2[9] * a[9 + 0 * BLOCK]; in solveB2()
1518 t1 -= region2[9] * a[9 + 1 * BLOCK]; in solveB2()
1519 t2 -= region2[9] * a[9 + 2 * BLOCK]; in solveB2()
1520 t3 -= region2[9] * a[9 + 3 * BLOCK]; in solveB2()
1522 t0 -= region2[10] * a[10 + 0 * BLOCK]; in solveB2()
1523 t1 -= region2[10] * a[10 + 1 * BLOCK]; in solveB2()
1524 t2 -= region2[10] * a[10 + 2 * BLOCK]; in solveB2()
1525 t3 -= region2[10] * a[10 + 3 * BLOCK]; in solveB2()
1527 t0 -= region2[11] * a[11 + 0 * BLOCK]; in solveB2()
1528 t1 -= region2[11] * a[11 + 1 * BLOCK]; in solveB2()
1529 t2 -= region2[11] * a[11 + 2 * BLOCK]; in solveB2()
1530 t3 -= region2[11] * a[11 + 3 * BLOCK]; in solveB2()
1532 t0 -= region2[12] * a[12 + 0 * BLOCK]; in solveB2()
1533 t1 -= region2[12] * a[12 + 1 * BLOCK]; in solveB2()
1534 t2 -= region2[12] * a[12 + 2 * BLOCK]; in solveB2()
1535 t3 -= region2[12] * a[12 + 3 * BLOCK]; in solveB2()
1537 t0 -= region2[13] * a[13 + 0 * BLOCK]; in solveB2()
1538 t1 -= region2[13] * a[13 + 1 * BLOCK]; in solveB2()
1539 t2 -= region2[13] * a[13 + 2 * BLOCK]; in solveB2()
1540 t3 -= region2[13] * a[13 + 3 * BLOCK]; in solveB2()
1542 t0 -= region2[14] * a[14 + 0 * BLOCK]; in solveB2()
1543 t1 -= region2[14] * a[14 + 1 * BLOCK]; in solveB2()
1544 t2 -= region2[14] * a[14 + 2 * BLOCK]; in solveB2()
1545 t3 -= region2[14] * a[14 + 3 * BLOCK]; in solveB2()
1547 t0 -= region2[15] * a[15 + 0 * BLOCK]; in solveB2()
1548 t1 -= region2[15] * a[15 + 1 * BLOCK]; in solveB2()
1549 t2 -= region2[15] * a[15 + 2 * BLOCK]; in solveB2()
1550 t3 -= region2[15] * a[15 + 3 * BLOCK]; in solveB2()
1556 a += 4 * BLOCK; in solveB2()
1561 for (j = 0; j < BLOCK; j++) { in solveB2()
1564 t00 -= region2[k] * a[k + j * BLOCK]; in solveB2()