|
475 | 475 | ret |
476 | 476 | %endmacro |
477 | 477 |
|
| 478 | +;;;;; |
| 479 | +; mbin_dispatch_init8_hygon parameters |
| 480 | +; 1-> function name |
| 481 | +; 2-> base function |
| 482 | +; 3-> SSE4_2 or 00/01 optimized function |
| 483 | +; 4-> AVX/02 opt func |
| 484 | +; 5-> AVX2/04 opt func |
| 485 | +; 6-> AVX512/06 opt func |
| 486 | +; 7-> AVX2 Update/07 opt func |
| 487 | +; 8-> AVX512 Update/10 opt func |
| 488 | +; |
| 489 | +; With special case: |
| 490 | +; - Use AVX on Hygon 1/2/3 platform |
| 491 | +;;;;; |
| 492 | +%macro mbin_dispatch_init8_hygon 8 |
| 493 | + section .text |
| 494 | + %1_dispatch_init: |
| 495 | + push rsi |
| 496 | + push rax |
| 497 | + push rbx |
| 498 | + push rcx |
| 499 | + push rdx |
| 500 | + push rdi |
| 501 | + lea rsi, [%2 WRT_OPT] ; Default - use base function |
| 502 | + |
| 503 | + mov eax, 1 |
| 504 | + cpuid |
| 505 | + mov ebx, ecx ; save cpuid1.ecx |
| 506 | + test ecx, FLAG_CPUID1_ECX_SSE4_2 |
| 507 | + je _%1_init_done ; Use base function if no SSE4_2 |
| 508 | + lea rsi, [%3 WRT_OPT] ; SSE possible so use 00/01 opt |
| 509 | + |
| 510 | + ;; Test for XMM_YMM support/AVX |
| 511 | + test ecx, FLAG_CPUID1_ECX_OSXSAVE |
| 512 | + je _%1_init_done |
| 513 | + xor ecx, ecx |
| 514 | + xgetbv ; xcr -> edx:eax |
| 515 | + mov edi, eax ; save xgetvb.eax |
| 516 | + |
| 517 | + and eax, FLAG_XGETBV_EAX_XMM_YMM |
| 518 | + cmp eax, FLAG_XGETBV_EAX_XMM_YMM |
| 519 | + jne _%1_init_done |
| 520 | + test ebx, FLAG_CPUID1_ECX_AVX |
| 521 | + je _%1_init_done |
| 522 | + lea rsi, [%4 WRT_OPT] ; AVX/02 opt |
| 523 | + |
| 524 | + ;; Hygon platform check: Use AVX opt on Hygon 1/2/3 for performance |
| 525 | + ;; Even if the have the ability to use AVX2 opt |
| 526 | + xor eax, eax |
| 527 | + cpuid |
| 528 | + mov eax, FLAG_CPUID0_EBX_HYGON |
| 529 | + cmp eax, ebx |
| 530 | + jne _%1_check_avx2 ; Not Hygon. Proceed as normal |
| 531 | + |
| 532 | + mov eax, FLAG_CPUID0_EDX_HYGON |
| 533 | + cmp eax, edx |
| 534 | + jne _%1_check_avx2 ; Not Hygon. Proceed as normal |
| 535 | + |
| 536 | + mov eax, FLAG_CPUID0_ECX_HYGON |
| 537 | + cmp eax, ecx |
| 538 | + jne _%1_check_avx2 ; Not Hygon. Proceed as normal |
| 539 | + |
| 540 | + ;; All vendor ID matches: Hygon confirmed |
| 541 | + ;; Further family & model check: Identify Hygon 1/2/3 |
| 542 | + mov eax, 1 |
| 543 | + cpuid |
| 544 | + and eax, FLAG_CPUID1_EAX_STEP_MASK |
| 545 | + mov ecx, FLAG_CPUID1_EAX_HYGON1 |
| 546 | + mov edx, FLAG_CPUID1_EAX_HYGON2 |
| 547 | + mov ebx, FLAG_CPUID1_EAX_HYGON3 |
| 548 | + |
| 549 | + cmp eax, ecx ; Hygon 1 |
| 550 | + je _%1_hygon_123_init |
| 551 | + cmp eax, edx ; Hygon 2 |
| 552 | + je _%1_hygon_123_init |
| 553 | + cmp eax, ebx ; Hygon 3 |
| 554 | + jne _%1_check_avx2 ; Not any of Hygon 1/2/3: Continue normal procedure |
| 555 | + |
| 556 | + _%1_hygon_123_init: |
| 557 | + ;; Init complete early for Hygon 1/2/3. |
| 558 | + jmp _%1_init_done ; Use AVX opt func registered before |
| 559 | + |
| 560 | + _%1_check_avx2: |
| 561 | + ;; Test for AVX2 |
| 562 | + xor ecx, ecx |
| 563 | + mov eax, 7 |
| 564 | + cpuid |
| 565 | + test ebx, FLAG_CPUID7_EBX_AVX2 |
| 566 | + je _%1_init_done ; No AVX2 possible |
| 567 | + lea rsi, [%5 WRT_OPT] ; AVX2/04 opt func |
| 568 | + |
| 569 | + ;; Test for AVX512 |
| 570 | + and edi, FLAG_XGETBV_EAX_ZMM_OPM |
| 571 | + cmp edi, FLAG_XGETBV_EAX_ZMM_OPM |
| 572 | + jne _%1_check_avx2_g2 ; No AVX512 possible |
| 573 | + and ebx, FLAGS_CPUID7_EBX_AVX512_G1 |
| 574 | + cmp ebx, FLAGS_CPUID7_EBX_AVX512_G1 |
| 575 | + lea rbx, [%6 WRT_OPT] ; AVX512/06 opt |
| 576 | + cmove rsi, rbx |
| 577 | + |
| 578 | + and ecx, FLAGS_CPUID7_ECX_AVX512_G2 |
| 579 | + cmp ecx, FLAGS_CPUID7_ECX_AVX512_G2 |
| 580 | + lea rbx, [%8 WRT_OPT] ; AVX512/10 opt |
| 581 | + cmove rsi, rbx |
| 582 | + jmp _%1_init_done |
| 583 | + |
| 584 | + _%1_check_avx2_g2: |
| 585 | + ;; Test for AVX2 Gen 2 |
| 586 | + and ecx, FLAGS_CPUID7_ECX_AVX2_G2 |
| 587 | + cmp ecx, FLAGS_CPUID7_ECX_AVX2_G2 |
| 588 | + lea rbx, [%7 WRT_OPT] ; AVX2/7 opt |
| 589 | + cmove rsi, rbx |
| 590 | + |
| 591 | + _%1_init_done: |
| 592 | + pop rdi |
| 593 | + pop rdx |
| 594 | + pop rcx |
| 595 | + pop rbx |
| 596 | + pop rax |
| 597 | + mov [%1_dispatched], rsi |
| 598 | + pop rsi |
| 599 | + ret |
| 600 | +%endmacro |
| 601 | + |
478 | 602 | %endif ; ifndef _MULTIBINARY_ASM_ |
0 commit comments