Lines Matching refs:va

280                 const float* va = kernel_tm.channel(i / 8);  in conv_im2col_sgemm_neon()  local
443 "=r"(va) // %9 in conv_im2col_sgemm_neon()
453 "9"(va), in conv_im2col_sgemm_neon()
472 sum0[n] += va[0] * vb[n]; in conv_im2col_sgemm_neon()
473 sum1[n] += va[1] * vb[n]; in conv_im2col_sgemm_neon()
474 sum2[n] += va[2] * vb[n]; in conv_im2col_sgemm_neon()
475 sum3[n] += va[3] * vb[n]; in conv_im2col_sgemm_neon()
476 sum4[n] += va[4] * vb[n]; in conv_im2col_sgemm_neon()
477 sum5[n] += va[5] * vb[n]; in conv_im2col_sgemm_neon()
478 sum6[n] += va[6] * vb[n]; in conv_im2col_sgemm_neon()
479 sum7[n] += va[7] * vb[n]; in conv_im2col_sgemm_neon()
480 va += 8; in conv_im2col_sgemm_neon()
482 sum0[n] += va[0] * vb[n + 8]; in conv_im2col_sgemm_neon()
483 sum1[n] += va[1] * vb[n + 8]; in conv_im2col_sgemm_neon()
484 sum2[n] += va[2] * vb[n + 8]; in conv_im2col_sgemm_neon()
485 sum3[n] += va[3] * vb[n + 8]; in conv_im2col_sgemm_neon()
486 sum4[n] += va[4] * vb[n + 8]; in conv_im2col_sgemm_neon()
487 sum5[n] += va[5] * vb[n + 8]; in conv_im2col_sgemm_neon()
488 sum6[n] += va[6] * vb[n + 8]; in conv_im2col_sgemm_neon()
489 sum7[n] += va[7] * vb[n + 8]; in conv_im2col_sgemm_neon()
490 va += 8; in conv_im2col_sgemm_neon()
492 sum0[n] += va[0] * vb[n + 16]; in conv_im2col_sgemm_neon()
493 sum1[n] += va[1] * vb[n + 16]; in conv_im2col_sgemm_neon()
494 sum2[n] += va[2] * vb[n + 16]; in conv_im2col_sgemm_neon()
495 sum3[n] += va[3] * vb[n + 16]; in conv_im2col_sgemm_neon()
496 sum4[n] += va[4] * vb[n + 16]; in conv_im2col_sgemm_neon()
497 sum5[n] += va[5] * vb[n + 16]; in conv_im2col_sgemm_neon()
498 sum6[n] += va[6] * vb[n + 16]; in conv_im2col_sgemm_neon()
499 sum7[n] += va[7] * vb[n + 16]; in conv_im2col_sgemm_neon()
500 va += 8; in conv_im2col_sgemm_neon()
502 sum0[n] += va[0] * vb[n + 24]; in conv_im2col_sgemm_neon()
503 sum1[n] += va[1] * vb[n + 24]; in conv_im2col_sgemm_neon()
504 sum2[n] += va[2] * vb[n + 24]; in conv_im2col_sgemm_neon()
505 sum3[n] += va[3] * vb[n + 24]; in conv_im2col_sgemm_neon()
506 sum4[n] += va[4] * vb[n + 24]; in conv_im2col_sgemm_neon()
507 sum5[n] += va[5] * vb[n + 24]; in conv_im2col_sgemm_neon()
508 sum6[n] += va[6] * vb[n + 24]; in conv_im2col_sgemm_neon()
509 sum7[n] += va[7] * vb[n + 24]; in conv_im2col_sgemm_neon()
510 va += 8; in conv_im2col_sgemm_neon()
512 sum0[n] += va[0] * vb[n + 32]; in conv_im2col_sgemm_neon()
513 sum1[n] += va[1] * vb[n + 32]; in conv_im2col_sgemm_neon()
514 sum2[n] += va[2] * vb[n + 32]; in conv_im2col_sgemm_neon()
515 sum3[n] += va[3] * vb[n + 32]; in conv_im2col_sgemm_neon()
516 sum4[n] += va[4] * vb[n + 32]; in conv_im2col_sgemm_neon()
517 sum5[n] += va[5] * vb[n + 32]; in conv_im2col_sgemm_neon()
518 sum6[n] += va[6] * vb[n + 32]; in conv_im2col_sgemm_neon()
519 sum7[n] += va[7] * vb[n + 32]; in conv_im2col_sgemm_neon()
520 va += 8; in conv_im2col_sgemm_neon()
522 sum0[n] += va[0] * vb[n + 40]; in conv_im2col_sgemm_neon()
523 sum1[n] += va[1] * vb[n + 40]; in conv_im2col_sgemm_neon()
524 sum2[n] += va[2] * vb[n + 40]; in conv_im2col_sgemm_neon()
525 sum3[n] += va[3] * vb[n + 40]; in conv_im2col_sgemm_neon()
526 sum4[n] += va[4] * vb[n + 40]; in conv_im2col_sgemm_neon()
527 sum5[n] += va[5] * vb[n + 40]; in conv_im2col_sgemm_neon()
528 sum6[n] += va[6] * vb[n + 40]; in conv_im2col_sgemm_neon()
529 sum7[n] += va[7] * vb[n + 40]; in conv_im2col_sgemm_neon()
530 va += 8; in conv_im2col_sgemm_neon()
532 sum0[n] += va[0] * vb[n + 48]; in conv_im2col_sgemm_neon()
533 sum1[n] += va[1] * vb[n + 48]; in conv_im2col_sgemm_neon()
534 sum2[n] += va[2] * vb[n + 48]; in conv_im2col_sgemm_neon()
535 sum3[n] += va[3] * vb[n + 48]; in conv_im2col_sgemm_neon()
536 sum4[n] += va[4] * vb[n + 48]; in conv_im2col_sgemm_neon()
537 sum5[n] += va[5] * vb[n + 48]; in conv_im2col_sgemm_neon()
538 sum6[n] += va[6] * vb[n + 48]; in conv_im2col_sgemm_neon()
539 sum7[n] += va[7] * vb[n + 48]; in conv_im2col_sgemm_neon()
540 va += 8; in conv_im2col_sgemm_neon()
542 sum0[n] += va[0] * vb[n + 56]; in conv_im2col_sgemm_neon()
543 sum1[n] += va[1] * vb[n + 56]; in conv_im2col_sgemm_neon()
544 sum2[n] += va[2] * vb[n + 56]; in conv_im2col_sgemm_neon()
545 sum3[n] += va[3] * vb[n + 56]; in conv_im2col_sgemm_neon()
546 sum4[n] += va[4] * vb[n + 56]; in conv_im2col_sgemm_neon()
547 sum5[n] += va[5] * vb[n + 56]; in conv_im2col_sgemm_neon()
548 sum6[n] += va[6] * vb[n + 56]; in conv_im2col_sgemm_neon()
549 sum7[n] += va[7] * vb[n + 56]; in conv_im2col_sgemm_neon()
550 va -= 56; in conv_im2col_sgemm_neon()
553 va += 64; in conv_im2col_sgemm_neon()
561 sum0[n] += va[0] * vb[n]; in conv_im2col_sgemm_neon()
562 sum1[n] += va[1] * vb[n]; in conv_im2col_sgemm_neon()
563 sum2[n] += va[2] * vb[n]; in conv_im2col_sgemm_neon()
564 sum3[n] += va[3] * vb[n]; in conv_im2col_sgemm_neon()
565 sum4[n] += va[4] * vb[n]; in conv_im2col_sgemm_neon()
566 sum5[n] += va[5] * vb[n]; in conv_im2col_sgemm_neon()
567 sum6[n] += va[6] * vb[n]; in conv_im2col_sgemm_neon()
568 sum7[n] += va[7] * vb[n]; in conv_im2col_sgemm_neon()
571 va += 8; in conv_im2col_sgemm_neon()
600 const float* va = kernel_tm.channel(i / 8); in conv_im2col_sgemm_neon() local
691 "=r"(va) // %9 in conv_im2col_sgemm_neon()
701 "9"(va), in conv_im2col_sgemm_neon()
717 sum0 += va[0] * vb[0]; in conv_im2col_sgemm_neon()
718 sum1 += va[1] * vb[0]; in conv_im2col_sgemm_neon()
719 sum2 += va[2] * vb[0]; in conv_im2col_sgemm_neon()
720 sum3 += va[3] * vb[0]; in conv_im2col_sgemm_neon()
721 sum4 += va[4] * vb[0]; in conv_im2col_sgemm_neon()
722 sum5 += va[5] * vb[0]; in conv_im2col_sgemm_neon()
723 sum6 += va[6] * vb[0]; in conv_im2col_sgemm_neon()
724 sum7 += va[7] * vb[0]; in conv_im2col_sgemm_neon()
726 va += 8; in conv_im2col_sgemm_neon()
771 const float* va = kernel_tm.channel(i / 8 + (i % 8) / 4); in conv_im2col_sgemm_neon() local
773 const float* va = kernel_tm.channel(i / 4); in conv_im2col_sgemm_neon() local
881 "=r"(va) // %5 in conv_im2col_sgemm_neon()
887 "5"(va), in conv_im2col_sgemm_neon()
991 "=r"(va) // %5 in conv_im2col_sgemm_neon()
997 "5"(va), in conv_im2col_sgemm_neon()
1013 sum0[n] += va[0] * vb[n]; in conv_im2col_sgemm_neon()
1014 sum1[n] += va[1] * vb[n]; in conv_im2col_sgemm_neon()
1015 sum2[n] += va[2] * vb[n]; in conv_im2col_sgemm_neon()
1016 sum3[n] += va[3] * vb[n]; in conv_im2col_sgemm_neon()
1017 va += 4; in conv_im2col_sgemm_neon()
1019 sum0[n] += va[0] * vb[n + 8]; in conv_im2col_sgemm_neon()
1020 sum1[n] += va[1] * vb[n + 8]; in conv_im2col_sgemm_neon()
1021 sum2[n] += va[2] * vb[n + 8]; in conv_im2col_sgemm_neon()
1022 sum3[n] += va[3] * vb[n + 8]; in conv_im2col_sgemm_neon()
1023 va += 4; in conv_im2col_sgemm_neon()
1025 sum0[n] += va[0] * vb[n + 16]; in conv_im2col_sgemm_neon()
1026 sum1[n] += va[1] * vb[n + 16]; in conv_im2col_sgemm_neon()
1027 sum2[n] += va[2] * vb[n + 16]; in conv_im2col_sgemm_neon()
1028 sum3[n] += va[3] * vb[n + 16]; in conv_im2col_sgemm_neon()
1029 va += 4; in conv_im2col_sgemm_neon()
1031 sum0[n] += va[0] * vb[n + 24]; in conv_im2col_sgemm_neon()
1032 sum1[n] += va[1] * vb[n + 24]; in conv_im2col_sgemm_neon()
1033 sum2[n] += va[2] * vb[n + 24]; in conv_im2col_sgemm_neon()
1034 sum3[n] += va[3] * vb[n + 24]; in conv_im2col_sgemm_neon()
1035 va += 4; in conv_im2col_sgemm_neon()
1037 sum0[n] += va[0] * vb[n + 32]; in conv_im2col_sgemm_neon()
1038 sum1[n] += va[1] * vb[n + 32]; in conv_im2col_sgemm_neon()
1039 sum2[n] += va[2] * vb[n + 32]; in conv_im2col_sgemm_neon()
1040 sum3[n] += va[3] * vb[n + 32]; in conv_im2col_sgemm_neon()
1041 va += 4; in conv_im2col_sgemm_neon()
1043 sum0[n] += va[0] * vb[n + 40]; in conv_im2col_sgemm_neon()
1044 sum1[n] += va[1] * vb[n + 40]; in conv_im2col_sgemm_neon()
1045 sum2[n] += va[2] * vb[n + 40]; in conv_im2col_sgemm_neon()
1046 sum3[n] += va[3] * vb[n + 40]; in conv_im2col_sgemm_neon()
1047 va += 4; in conv_im2col_sgemm_neon()
1049 sum0[n] += va[0] * vb[n + 48]; in conv_im2col_sgemm_neon()
1050 sum1[n] += va[1] * vb[n + 48]; in conv_im2col_sgemm_neon()
1051 sum2[n] += va[2] * vb[n + 48]; in conv_im2col_sgemm_neon()
1052 sum3[n] += va[3] * vb[n + 48]; in conv_im2col_sgemm_neon()
1053 va += 4; in conv_im2col_sgemm_neon()
1055 sum0[n] += va[0] * vb[n + 56]; in conv_im2col_sgemm_neon()
1056 sum1[n] += va[1] * vb[n + 56]; in conv_im2col_sgemm_neon()
1057 sum2[n] += va[2] * vb[n + 56]; in conv_im2col_sgemm_neon()
1058 sum3[n] += va[3] * vb[n + 56]; in conv_im2col_sgemm_neon()
1059 va -= 28; in conv_im2col_sgemm_neon()
1062 va += 32; in conv_im2col_sgemm_neon()
1070 sum0[n] += va[0] * vb[n]; in conv_im2col_sgemm_neon()
1071 sum1[n] += va[1] * vb[n]; in conv_im2col_sgemm_neon()
1072 sum2[n] += va[2] * vb[n]; in conv_im2col_sgemm_neon()
1073 sum3[n] += va[3] * vb[n]; in conv_im2col_sgemm_neon()
1076 va += 4; in conv_im2col_sgemm_neon()
1098 const float* va = kernel_tm.channel(i / 8 + (i % 8) / 4); in conv_im2col_sgemm_neon() local
1100 const float* va = kernel_tm.channel(i / 4); in conv_im2col_sgemm_neon() local
1169 "=r"(va) // %5 in conv_im2col_sgemm_neon()
1175 "5"(va), in conv_im2col_sgemm_neon()
1240 "=r"(va) // %5 in conv_im2col_sgemm_neon()
1246 "5"(va), in conv_im2col_sgemm_neon()
1259 sum0 += va[0] * vb[0]; in conv_im2col_sgemm_neon()
1260 sum1 += va[1] * vb[0]; in conv_im2col_sgemm_neon()
1261 sum2 += va[2] * vb[0]; in conv_im2col_sgemm_neon()
1262 sum3 += va[3] * vb[0]; in conv_im2col_sgemm_neon()
1264 va += 4; in conv_im2col_sgemm_neon()
1294 const float* va = kernel_tm.channel(i / 8 + (i % 8) / 4 + i % 4); in conv_im2col_sgemm_neon() local
1296 const float* va = kernel_tm.channel(i / 4 + i % 4); in conv_im2col_sgemm_neon() local
1359 "=r"(va) // %2 in conv_im2col_sgemm_neon()
1362 "2"(va), in conv_im2col_sgemm_neon()
1423 "=r"(va) // %2 in conv_im2col_sgemm_neon()
1426 "2"(va), in conv_im2col_sgemm_neon()
1439 sum[n] += va[0] * vb[n]; in conv_im2col_sgemm_neon()
1440 sum[n] += va[1] * vb[n + 8]; in conv_im2col_sgemm_neon()
1441 sum[n] += va[2] * vb[n + 16]; in conv_im2col_sgemm_neon()
1442 sum[n] += va[3] * vb[n + 24]; in conv_im2col_sgemm_neon()
1443 sum[n] += va[4] * vb[n + 32]; in conv_im2col_sgemm_neon()
1444 sum[n] += va[5] * vb[n + 40]; in conv_im2col_sgemm_neon()
1445 sum[n] += va[6] * vb[n + 48]; in conv_im2col_sgemm_neon()
1446 sum[n] += va[7] * vb[n + 56]; in conv_im2col_sgemm_neon()
1449 va += 8; in conv_im2col_sgemm_neon()
1457 sum[n] += va[0] * vb[n]; in conv_im2col_sgemm_neon()
1460 va += 1; in conv_im2col_sgemm_neon()
1476 const float* va = kernel_tm.channel(i / 8 + (i % 8) / 4 + i % 4); in conv_im2col_sgemm_neon() local
1478 const float* va = kernel_tm.channel(i / 4 + i % 4); in conv_im2col_sgemm_neon() local
1490 float32x4_t _k0 = vld1q_f32(va); in conv_im2col_sgemm_neon()
1491 va += 4; in conv_im2col_sgemm_neon()
1511 sum0 += va[0] * vb[0]; in conv_im2col_sgemm_neon()
1513 va += 1; in conv_im2col_sgemm_neon()