Skip to content

Commit d36de97

Browse files
MaodiMapablodelara
authored andcommitted
erasure_code: add special dispatch case
Using highest-level instruction set may not reveal the best performance on certain platform. E.g. using AVX impl for ec updating instead of AVX2 impl can be faster on Hygon 1/2/3 platform. This commit identifies Hygon platform and use a special dispatch case for ec_encode_data_update to choose certain instruction set impl. Signed-off-by: Maodi Ma <[email protected]>
1 parent a439f0d commit d36de97

File tree

3 files changed

+133
-2
lines changed

3 files changed

+133
-2
lines changed

erasure_code/ec_multibinary.asm

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ mbin_interface ec_init_tables
7777

7878
mbin_dispatch_init5 gf_vect_mul, gf_vect_mul_base, gf_vect_mul_sse, gf_vect_mul_avx, gf_vect_mul_avx
7979
mbin_dispatch_init8 ec_encode_data, ec_encode_data_base, ec_encode_data_sse, ec_encode_data_avx, ec_encode_data_avx2, ec_encode_data_avx512, ec_encode_data_avx2_gfni, ec_encode_data_avx512_gfni
80-
mbin_dispatch_init8 ec_encode_data_update, ec_encode_data_update_base, ec_encode_data_update_sse, ec_encode_data_update_avx, ec_encode_data_update_avx2, ec_encode_data_update_avx512, ec_encode_data_update_avx2_gfni, ec_encode_data_update_avx512_gfni
80+
mbin_dispatch_init8_hygon ec_encode_data_update, ec_encode_data_update_base, ec_encode_data_update_sse, ec_encode_data_update_avx, ec_encode_data_update_avx2, ec_encode_data_update_avx512, ec_encode_data_update_avx2_gfni, ec_encode_data_update_avx512_gfni
8181
mbin_dispatch_init6 gf_vect_mad, gf_vect_mad_base, gf_vect_mad_sse, gf_vect_mad_avx, gf_vect_mad_avx2, gf_vect_mad_avx512
8282
mbin_dispatch_init6 gf_vect_dot_prod, gf_vect_dot_prod_base, gf_vect_dot_prod_sse, gf_vect_dot_prod_avx, gf_vect_dot_prod_avx2, gf_vect_dot_prod_avx512
8383
mbin_dispatch_init8 ec_init_tables, ec_init_tables_base, ec_init_tables_base, ec_init_tables_base, ec_init_tables_base, ec_init_tables_base, ec_init_tables_gfni, ec_init_tables_gfni

include/multibinary.asm

Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -475,4 +475,128 @@
475475
ret
476476
%endmacro
477477

478+
;;;;;
479+
; mbin_dispatch_init8_hygon parameters
480+
; 1-> function name
481+
; 2-> base function
482+
; 3-> SSE4_2 or 00/01 optimized function
483+
; 4-> AVX/02 opt func
484+
; 5-> AVX2/04 opt func
485+
; 6-> AVX512/06 opt func
486+
; 7-> AVX2 Update/07 opt func
487+
; 8-> AVX512 Update/10 opt func
488+
;
489+
; With special case:
490+
; - Use AVX on Hygon 1/2/3 platform
491+
;;;;;
492+
%macro mbin_dispatch_init8_hygon 8
493+
section .text
494+
%1_dispatch_init:
495+
push rsi
496+
push rax
497+
push rbx
498+
push rcx
499+
push rdx
500+
push rdi
501+
lea rsi, [%2 WRT_OPT] ; Default - use base function
502+
503+
mov eax, 1
504+
cpuid
505+
mov ebx, ecx ; save cpuid1.ecx
506+
test ecx, FLAG_CPUID1_ECX_SSE4_2
507+
je _%1_init_done ; Use base function if no SSE4_2
508+
lea rsi, [%3 WRT_OPT] ; SSE possible so use 00/01 opt
509+
510+
;; Test for XMM_YMM support/AVX
511+
test ecx, FLAG_CPUID1_ECX_OSXSAVE
512+
je _%1_init_done
513+
xor ecx, ecx
514+
xgetbv ; xcr -> edx:eax
515+
mov edi, eax ; save xgetvb.eax
516+
517+
and eax, FLAG_XGETBV_EAX_XMM_YMM
518+
cmp eax, FLAG_XGETBV_EAX_XMM_YMM
519+
jne _%1_init_done
520+
test ebx, FLAG_CPUID1_ECX_AVX
521+
je _%1_init_done
522+
lea rsi, [%4 WRT_OPT] ; AVX/02 opt
523+
524+
;; Hygon platform check: Use AVX opt on Hygon 1/2/3 for performance
525+
;; Even if the have the ability to use AVX2 opt
526+
xor eax, eax
527+
cpuid
528+
mov eax, FLAG_CPUID0_EBX_HYGON
529+
cmp eax, ebx
530+
jne _%1_check_avx2 ; Not Hygon. Proceed as normal
531+
532+
mov eax, FLAG_CPUID0_EDX_HYGON
533+
cmp eax, edx
534+
jne _%1_check_avx2 ; Not Hygon. Proceed as normal
535+
536+
mov eax, FLAG_CPUID0_ECX_HYGON
537+
cmp eax, ecx
538+
jne _%1_check_avx2 ; Not Hygon. Proceed as normal
539+
540+
;; All vendor ID matches: Hygon confirmed
541+
;; Further family & model check: Identify Hygon 1/2/3
542+
mov eax, 1
543+
cpuid
544+
and eax, FLAG_CPUID1_EAX_STEP_MASK
545+
mov ecx, FLAG_CPUID1_EAX_HYGON1
546+
mov edx, FLAG_CPUID1_EAX_HYGON2
547+
mov ebx, FLAG_CPUID1_EAX_HYGON3
548+
549+
cmp eax, ecx ; Hygon 1
550+
je _%1_hygon_123_init
551+
cmp eax, edx ; Hygon 2
552+
je _%1_hygon_123_init
553+
cmp eax, ebx ; Hygon 3
554+
jne _%1_check_avx2 ; Not any of Hygon 1/2/3: Continue normal procedure
555+
556+
_%1_hygon_123_init:
557+
;; Init complete early for Hygon 1/2/3.
558+
jmp _%1_init_done ; Use AVX opt func registered before
559+
560+
_%1_check_avx2:
561+
;; Test for AVX2
562+
xor ecx, ecx
563+
mov eax, 7
564+
cpuid
565+
test ebx, FLAG_CPUID7_EBX_AVX2
566+
je _%1_init_done ; No AVX2 possible
567+
lea rsi, [%5 WRT_OPT] ; AVX2/04 opt func
568+
569+
;; Test for AVX512
570+
and edi, FLAG_XGETBV_EAX_ZMM_OPM
571+
cmp edi, FLAG_XGETBV_EAX_ZMM_OPM
572+
jne _%1_check_avx2_g2 ; No AVX512 possible
573+
and ebx, FLAGS_CPUID7_EBX_AVX512_G1
574+
cmp ebx, FLAGS_CPUID7_EBX_AVX512_G1
575+
lea rbx, [%6 WRT_OPT] ; AVX512/06 opt
576+
cmove rsi, rbx
577+
578+
and ecx, FLAGS_CPUID7_ECX_AVX512_G2
579+
cmp ecx, FLAGS_CPUID7_ECX_AVX512_G2
580+
lea rbx, [%8 WRT_OPT] ; AVX512/10 opt
581+
cmove rsi, rbx
582+
jmp _%1_init_done
583+
584+
_%1_check_avx2_g2:
585+
;; Test for AVX2 Gen 2
586+
and ecx, FLAGS_CPUID7_ECX_AVX2_G2
587+
cmp ecx, FLAGS_CPUID7_ECX_AVX2_G2
588+
lea rbx, [%7 WRT_OPT] ; AVX2/7 opt
589+
cmove rsi, rbx
590+
591+
_%1_init_done:
592+
pop rdi
593+
pop rdx
594+
pop rcx
595+
pop rbx
596+
pop rax
597+
mov [%1_dispatched], rsi
598+
pop rsi
599+
ret
600+
%endmacro
601+
478602
%endif ; ifndef _MULTIBINARY_ASM_

include/reg_sizes.asm

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,14 @@
7070
%define FLAG_XGETBV_EAX_XMM_YMM 0x6
7171
%define FLAG_XGETBV_EAX_ZMM_OPM 0xe0
7272

73-
%define FLAG_CPUID1_EAX_STEP_MASK 0xfffffff0
73+
%define FLAG_CPUID1_EAX_STEP_MASK 0xfffffff0
74+
%define FLAG_CPUID1_EAX_HYGON1 0x00900f00
75+
%define FLAG_CPUID1_EAX_HYGON2 0x00900f10
76+
%define FLAG_CPUID1_EAX_HYGON3 0x00900f20
77+
78+
%define FLAG_CPUID0_EBX_HYGON 0x6f677948 ;"ogyH"
79+
%define FLAG_CPUID0_EDX_HYGON 0x6e65476e ;"neGn"
80+
%define FLAG_CPUID0_ECX_HYGON 0x656e6975 ;"eniu"
7481

7582
; define d and w variants for registers
7683

0 commit comments

Comments
 (0)