;; Machine description for AArch64 SVE2. ;; Copyright (C) 2019-2020 Free Software Foundation, Inc. ;; Contributed by ARM Ltd. ;; ;; This file is part of GCC. ;; ;; GCC is free software; you can redistribute it and/or modify it ;; under the terms of the GNU General Public License as published by ;; the Free Software Foundation; either version 3, or (at your option) ;; any later version. ;; ;; GCC is distributed in the hope that it will be useful, but ;; WITHOUT ANY WARRANTY; without even the implied warranty of ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ;; General Public License for more details. ;; ;; You should have received a copy of the GNU General Public License ;; along with GCC; see the file COPYING3. If not see ;; . ;; The file is organised into the following sections (search for the full ;; line): ;; ;; == Moves ;; ---- Non-temporal gather loads ;; ---- Non-temporal scatter stores ;; ;; == Uniform binary arithmnetic ;; ---- [INT] Multiplication ;; ---- [INT] Scaled high-part multiplication ;; ---- [INT] General binary arithmetic that maps to unspecs ;; ---- [INT] Saturating binary arithmetic ;; ---- [INT] Saturating left shifts ;; ;; == Uniform ternary arithmnetic ;; ---- [INT] General ternary arithmetic that maps to unspecs ;; ---- [INT] Multiply-and-accumulate operations ;; ---- [INT] Binary logic operations with rotation ;; ---- [INT] Ternary logic operations ;; ---- [INT] Shift-and-accumulate operations ;; ---- [INT] Shift-and-insert operations ;; ---- [INT] Sum of absolute differences ;; ;; == Extending arithmetic ;; ---- [INT] Wide binary arithmetic ;; ---- [INT] Long binary arithmetic ;; ---- [INT] Long left shifts ;; ---- [INT] Long binary arithmetic with accumulation ;; ---- [FP] Long multiplication with accumulation ;; ;; == Narrowing arithnetic ;; ---- [INT] Narrowing unary arithmetic ;; ---- [INT] Narrowing binary arithmetic ;; ---- [INT] Narrowing right shifts ;; ;; == Pairwise arithmetic ;; ---- [INT] Pairwise arithmetic ;; ---- [FP] Pairwise arithmetic ;; ---- [INT] Pairwise arithmetic with accumulation ;; ;; == Complex arithmetic ;; ---- [INT] Complex binary operations ;; ---- [INT] Complex ternary operations ;; ---- [INT] Complex dot product ;; ;; == Conversions ;; ---- [FP<-FP] Widening conversions ;; ---- [FP<-FP] Narrowing conversions ;; ;; == Other arithmetic ;; ---- [INT] Reciprocal approximation ;; ---- [INT<-FP] Base-2 logarithm ;; ---- [INT] Polynomial multiplication ;; ;; == Permutation ;; ---- [INT,FP] General permutes ;; ---- [INT] Optional bit-permute extensions ;; ;; == General ;; ---- Check for aliases between pointers ;; ---- Histogram processing ;; ---- String matching ;; ;; == Crypotographic extensions ;; ---- Optional AES extensions ;; ---- Optional SHA-3 extensions ;; ---- Optional SM4 extensions ;; ========================================================================= ;; == Moves ;; ========================================================================= ;; ------------------------------------------------------------------------- ;; ---- Non-temporal gather loads ;; ------------------------------------------------------------------------- ;; Includes gather forms of: ;; - LDNT1B ;; - LDNT1D ;; - LDNT1H ;; - LDNT1W ;; ------------------------------------------------------------------------- ;; Non-extending loads. (define_insn "@aarch64_gather_ldnt" [(set (match_operand:SVE_FULL_SD 0 "register_operand" "=w, w") (unspec:SVE_FULL_SD [(match_operand: 1 "register_operand" "Upl, Upl") (match_operand:DI 2 "aarch64_reg_or_zero" "Z, r") (match_operand: 3 "register_operand" "w, w") (mem:BLK (scratch))] UNSPEC_LDNT1_GATHER))] "TARGET_SVE2" "@ ldnt1\t%0., %1/z, [%3.] ldnt1\t%0., %1/z, [%3., %2]" ) ;; Extending loads. (define_insn_and_rewrite "@aarch64_gather_ldnt_" [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, w") (unspec:SVE_FULL_SDI [(match_operand: 4 "general_operand" "UplDnm, UplDnm") (ANY_EXTEND:SVE_FULL_SDI (unspec:SVE_PARTIAL_I [(match_operand: 1 "register_operand" "Upl, Upl") (match_operand:DI 2 "aarch64_reg_or_zero" "Z, r") (match_operand: 3 "register_operand" "w, w") (mem:BLK (scratch))] UNSPEC_LDNT1_GATHER))] UNSPEC_PRED_X))] "TARGET_SVE2 && (~ & ) == 0" "@ ldnt1\t%0., %1/z, [%3.] ldnt1\t%0., %1/z, [%3., %2]" "&& !CONSTANT_P (operands[4])" { operands[4] = CONSTM1_RTX (mode); } ) ;; ------------------------------------------------------------------------- ;; ---- Non-temporal scatter stores ;; ------------------------------------------------------------------------- ;; Includes scatter forms of: ;; - STNT1B ;; - STNT1D ;; - STNT1H ;; - STNT1W ;; ------------------------------------------------------------------------- ;; Non-truncating stores. (define_insn "@aarch64_scatter_stnt" [(set (mem:BLK (scratch)) (unspec:BLK [(match_operand: 0 "register_operand" "Upl, Upl") (match_operand:DI 1 "aarch64_reg_or_zero" "Z, r") (match_operand: 2 "register_operand" "w, w") (match_operand:SVE_FULL_SD 3 "register_operand" "w, w")] UNSPEC_STNT1_SCATTER))] "TARGET_SVE" "@ stnt1\t%3., %0, [%2.] stnt1\t%3., %0, [%2., %1]" ) ;; Truncating stores. (define_insn "@aarch64_scatter_stnt_" [(set (mem:BLK (scratch)) (unspec:BLK [(match_operand: 0 "register_operand" "Upl, Upl") (match_operand:DI 1 "aarch64_reg_or_zero" "Z, r") (match_operand: 2 "register_operand" "w, w") (truncate:SVE_PARTIAL_I (match_operand:SVE_FULL_SDI 3 "register_operand" "w, w"))] UNSPEC_STNT1_SCATTER))] "TARGET_SVE2 && (~ & ) == 0" "@ stnt1\t%3., %0, [%2.] stnt1\t%3., %0, [%2., %1]" ) ;; ========================================================================= ;; == Uniform binary arithmnetic ;; ========================================================================= ;; ------------------------------------------------------------------------- ;; ---- [INT] Multiplication ;; ------------------------------------------------------------------------- ;; Includes the lane forms of: ;; - MUL ;; ------------------------------------------------------------------------- (define_insn "@aarch64_mul_lane_" [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w") (mult:SVE_FULL_HSDI (unspec:SVE_FULL_HSDI [(match_operand:SVE_FULL_HSDI 2 "register_operand" "") (match_operand:SI 3 "const_int_operand")] UNSPEC_SVE_LANE_SELECT) (match_operand:SVE_FULL_HSDI 1 "register_operand" "w")))] "TARGET_SVE2" "mul\t%0., %1., %2.[%3]" ) ;; ------------------------------------------------------------------------- ;; ---- [INT] Scaled high-part multiplication ;; ------------------------------------------------------------------------- ;; The patterns in this section are synthetic. ;; ------------------------------------------------------------------------- ;; Unpredicated integer multiply-high-with-(round-and-)scale. (define_expand "mulhs3" [(set (match_operand:SVE_FULL_BHSI 0 "register_operand") (unspec:SVE_FULL_BHSI [(match_dup 3) (unspec:SVE_FULL_BHSI [(match_operand:SVE_FULL_BHSI 1 "register_operand") (match_operand:SVE_FULL_BHSI 2 "register_operand")] MULHRS)] UNSPEC_PRED_X))] "TARGET_SVE2" { operands[3] = aarch64_ptrue_reg (mode); rtx prod_b = gen_reg_rtx (mode); rtx prod_t = gen_reg_rtx (mode); emit_insn (gen_aarch64_sve_mullb (prod_b, operands[1], operands[2])); emit_insn (gen_aarch64_sve_mullt (prod_t, operands[1], operands[2])); rtx shift = GEN_INT (GET_MODE_UNIT_BITSIZE (mode) - 1); emit_insn (gen_aarch64_sve_shrnb (operands[0], prod_b, shift)); emit_insn (gen_aarch64_sve_shrnt (operands[0], operands[0], prod_t, shift)); DONE; } ) ;; ------------------------------------------------------------------------- ;; ---- [INT] General binary arithmetic that maps to unspecs ;; ------------------------------------------------------------------------- ;; Includes: ;; - SHADD ;; - SHSUB ;; - SHSUBR ;; - SQRSHL ;; - SQRSHLR ;; - SRHADD ;; - SRSHL ;; - SRSHLR ;; - SUQADD ;; - UHADD ;; - UHSUB ;; - UHSUBR ;; - UQRSHL ;; - UQRSHLR ;; - URHADD ;; - URSHL ;; - URSHLR ;; - USQADD ;; ------------------------------------------------------------------------- ;; Integer average (floor). (define_expand "avg3_floor" [(set (match_operand:SVE_FULL_I 0 "register_operand") (unspec:SVE_FULL_I [(match_dup 3) (unspec:SVE_FULL_I [(match_operand:SVE_FULL_I 1 "register_operand") (match_operand:SVE_FULL_I 2 "register_operand")] HADD)] UNSPEC_PRED_X))] "TARGET_SVE2" { operands[3] = force_reg (mode, CONSTM1_RTX (mode)); } ) ;; Integer average (rounding). (define_expand "avg3_ceil" [(set (match_operand:SVE_FULL_I 0 "register_operand") (unspec:SVE_FULL_I [(match_dup 3) (unspec:SVE_FULL_I [(match_operand:SVE_FULL_I 1 "register_operand") (match_operand:SVE_FULL_I 2 "register_operand")] RHADD)] UNSPEC_PRED_X))] "TARGET_SVE2" { operands[3] = force_reg (mode, CONSTM1_RTX (mode)); } ) ;; The immediate form of SQADD acts as an immediate form of SUQADD ;; over its full range. In contrast to the ss_plus pattern, we do ;; not need to treat byte immediates specially. E.g.: ;; ;; SQADD Z0.B, Z0.B, #128 ;; ;; is equivalent to: ;; ;; MOV Z1.B, #128 ;; SUQADD Z0.B, P0/M, Z0.B, Z1.B ;; ;; even though it's not equivalent to: ;; ;; MOV Z1.B, #128 ;; SQADD Z0.B, P0/M, Z0.B, Z1.B // Saturating subtraction of 128 (define_insn "@aarch64_sve_suqadd_const" [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") (unspec:SVE_FULL_I [(match_operand:SVE_FULL_I 1 "register_operand" "0, w") (match_operand:SVE_FULL_I 2 "aarch64_sve_arith_immediate")] UNSPEC_SUQADD))] "TARGET_SVE2" "@ sqadd\t%0., %0., #%D2 movprfx\t%0, %1\;sqadd\t%0., %0., #%D2" [(set_attr "movprfx" "*,yes")] ) ;; General predicated binary arithmetic. All operations handled here ;; are commutative or have a reversed form. (define_insn "@aarch64_pred_" [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, ?&w") (unspec:SVE_FULL_I [(match_operand: 1 "register_operand" "Upl, Upl, Upl") (unspec:SVE_FULL_I [(match_operand:SVE_FULL_I 2 "register_operand" "0, w, w") (match_operand:SVE_FULL_I 3 "register_operand" "w, 0, w")] SVE2_COND_INT_BINARY_REV)] UNSPEC_PRED_X))] "TARGET_SVE2" "@ \t%0., %1/m, %0., %3. \t%0., %1/m, %0., %2. movprfx\t%0, %2\;\t%0., %1/m, %0., %3." [(set_attr "movprfx" "*,*,yes")] ) ;; Predicated binary arithmetic with merging. (define_expand "@cond_" [(set (match_operand:SVE_FULL_I 0 "register_operand") (unspec:SVE_FULL_I [(match_operand: 1 "register_operand") (unspec:SVE_FULL_I [(match_dup 5) (unspec:SVE_FULL_I [(match_operand:SVE_FULL_I 2 "register_operand") (match_operand:SVE_FULL_I 3 "register_operand")] SVE2_COND_INT_BINARY)] UNSPEC_PRED_X) (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")] UNSPEC_SEL))] "TARGET_SVE2" { operands[5] = CONSTM1_RTX (mode); } ) ;; Predicated binary arithmetic, merging with the first input. (define_insn_and_rewrite "*cond__2" [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") (unspec:SVE_FULL_I [(match_operand: 1 "register_operand" "Upl, Upl") (unspec:SVE_FULL_I [(match_operand 4) (unspec:SVE_FULL_I [(match_operand:SVE_FULL_I 2 "register_operand" "0, w") (match_operand:SVE_FULL_I 3 "register_operand" "w, w")] SVE2_COND_INT_BINARY)] UNSPEC_PRED_X) (match_dup 2)] UNSPEC_SEL))] "TARGET_SVE2" "@ \t%0., %1/m, %0., %3. movprfx\t%0, %2\;\t%0., %1/m, %0., %3." "&& !CONSTANT_P (operands[4])" { operands[4] = CONSTM1_RTX (mode); } [(set_attr "movprfx" "*,yes")] ) ;; Predicated binary arithmetic, merging with the second input. (define_insn_and_rewrite "*cond__3" [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") (unspec:SVE_FULL_I [(match_operand: 1 "register_operand" "Upl, Upl") (unspec:SVE_FULL_I [(match_operand 4) (unspec:SVE_FULL_I [(match_operand:SVE_FULL_I 2 "register_operand" "w, w") (match_operand:SVE_FULL_I 3 "register_operand" "0, w")] SVE2_COND_INT_BINARY_REV)] UNSPEC_PRED_X) (match_dup 3)] UNSPEC_SEL))] "TARGET_SVE2" "@ \t%0., %1/m, %0., %2. movprfx\t%0, %3\;\t%0., %1/m, %0., %2." "&& !CONSTANT_P (operands[4])" { operands[4] = CONSTM1_RTX (mode); } [(set_attr "movprfx" "*,yes")] ) ;; Predicated binary operations, merging with an independent value. (define_insn_and_rewrite "*cond__any" [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w, &w, &w, ?&w") (unspec:SVE_FULL_I [(match_operand: 1 "register_operand" "Upl, Upl, Upl, Upl, Upl") (unspec:SVE_FULL_I [(match_operand 5) (unspec:SVE_FULL_I [(match_operand:SVE_FULL_I 2 "register_operand" "0, w, w, w, w") (match_operand:SVE_FULL_I 3 "register_operand" "w, 0, w, w, w")] SVE2_COND_INT_BINARY_REV)] UNSPEC_PRED_X) (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")] UNSPEC_SEL))] "TARGET_SVE2 && !rtx_equal_p (operands[2], operands[4]) && !rtx_equal_p (operands[3], operands[4])" "@ movprfx\t%0., %1/z, %0.\;\t%0., %1/m, %0., %3. movprfx\t%0., %1/z, %0.\;\t%0., %1/m, %0., %2. movprfx\t%0., %1/z, %2.\;\t%0., %1/m, %0., %3. movprfx\t%0., %1/m, %2.\;\t%0., %1/m, %0., %3. #" "&& 1" { if (reload_completed && register_operand (operands[4], mode) && !rtx_equal_p (operands[0], operands[4])) { emit_insn (gen_vcond_mask_ (operands[0], operands[2], operands[4], operands[1])); operands[4] = operands[2] = operands[0]; } else if (!CONSTANT_P (operands[5])) operands[5] = CONSTM1_RTX (mode); else FAIL; } [(set_attr "movprfx" "yes")] ) ;; Predicated binary operations with no reverse form, merging with zero. ;; At present we don't generate these patterns via a cond_* optab, ;; so there's no correctness requirement to handle merging with an ;; independent value. (define_insn_and_rewrite "*cond__z" [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w") (unspec:SVE_FULL_I [(match_operand: 1 "register_operand" "Upl, Upl") (unspec:SVE_FULL_I [(match_operand 5) (unspec:SVE_FULL_I [(match_operand:SVE_FULL_I 2 "register_operand" "0, w") (match_operand:SVE_FULL_I 3 "register_operand" "w, w")] SVE2_COND_INT_BINARY_NOREV)] UNSPEC_PRED_X) (match_operand:SVE_FULL_I 4 "aarch64_simd_imm_zero")] UNSPEC_SEL))] "TARGET_SVE2" "@ movprfx\t%0., %1/z, %0.\;\t%0., %1/m, %0., %3. movprfx\t%0., %1/z, %2.\;\t%0., %1/m, %0., %3." "&& !CONSTANT_P (operands[5])" { operands[5] = CONSTM1_RTX (mode); } [(set_attr "movprfx" "yes")] ) ;; ------------------------------------------------------------------------- ;; ---- [INT] Saturating binary arithmetic ;; ------------------------------------------------------------------------- ;; Includes: ;; - SQDMULH ;; - SQRDMULH ;; ------------------------------------------------------------------------- (define_insn "@aarch64_sve_" [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w") (unspec:SVE_FULL_I [(match_operand:SVE_FULL_I 1 "register_operand" "w") (match_operand:SVE_FULL_I 2 "register_operand" "w")] SVE2_INT_BINARY))] "TARGET_SVE2" "\t%0., %1., %2." ) (define_insn "@aarch64_sve__lane_" [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w") (unspec:SVE_FULL_HSDI [(match_operand:SVE_FULL_HSDI 1 "register_operand" "w") (unspec:SVE_FULL_HSDI [(match_operand:SVE_FULL_HSDI 2 "register_operand" "") (match_operand:SI 3 "const_int_operand")] UNSPEC_SVE_LANE_SELECT)] SVE2_INT_BINARY_LANE))] "TARGET_SVE2" "\t%0., %1., %2.[%3]" ) ;; ------------------------------------------------------------------------- ;; ---- [INT] Saturating left shifts ;; ------------------------------------------------------------------------- ;; Includes: ;; - SQSHL ;; - SQSHLR ;; - UQSHL ;; - UQSHLR ;; ------------------------------------------------------------------------- ;; Predicated left shifts. (define_insn "@aarch64_pred_" [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, w, ?&w, ?&w") (unspec:SVE_FULL_I [(match_operand: 1 "register_operand" "Upl, Upl, Upl, Upl, Upl") (unspec:SVE_FULL_I [(match_operand:SVE_FULL_I 2 "register_operand" "0, 0, w, w, w") (match_operand:SVE_FULL_I 3 "aarch64_sve_shift_operand" "D, w, 0, D, w")] SVE2_COND_INT_SHIFT)] UNSPEC_PRED_X))] "TARGET_SVE2" "@ \t%0., %1/m, %0., #%3 \t%0., %1/m, %0., %3. r\t%0., %1/m, %0., %2. movprfx\t%0, %2\;\t%0., %1/m, %0., #%3 movprfx\t%0, %2\;\t%0., %1/m, %0., %3." [(set_attr "movprfx" "*,*,*,yes,yes")] ) ;; Predicated left shifts with merging. (define_expand "@cond_" [(set (match_operand:SVE_FULL_I 0 "register_operand") (unspec:SVE_FULL_I [(match_operand: 1 "register_operand") (unspec:SVE_FULL_I [(match_dup 5) (unspec:SVE_FULL_I [(match_operand:SVE_FULL_I 2 "register_operand") (match_operand:SVE_FULL_I 3 "aarch64_sve_shift_operand")] SVE2_COND_INT_SHIFT)] UNSPEC_PRED_X) (match_operand:SVE_FULL_I 4 "register_operand")] UNSPEC_SEL))] "TARGET_SVE2" { operands[5] = CONSTM1_RTX (mode); } ) ;; Predicated left shifts, merging with the first input. (define_insn_and_rewrite "*cond__2" [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, ?&w, ?&w") (unspec:SVE_FULL_I [(match_operand: 1 "register_operand" "Upl, Upl, Upl, Upl") (unspec:SVE_FULL_I [(match_operand 4) (unspec:SVE_FULL_I [(match_operand:SVE_FULL_I 2 "register_operand" "0, 0, w, w") (match_operand:SVE_FULL_I 3 "aarch64_sve_shift_operand" "D, w, D, w")] SVE2_COND_INT_SHIFT)] UNSPEC_PRED_X) (match_dup 2)] UNSPEC_SEL))] "TARGET_SVE2" "@ \t%0., %1/m, %0., #%3 \t%0., %1/m, %0., %3. movprfx\t%0, %2\;\t%0., %1/m, %0., #%3 movprfx\t%0, %2\;\t%0., %1/m, %0., %3." "&& !CONSTANT_P (operands[4])" { operands[4] = CONSTM1_RTX (mode); } [(set_attr "movprfx" "*,*,yes,yes")] ) ;; Predicated left shifts, merging with the second input. (define_insn_and_rewrite "*cond__3" [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") (unspec:SVE_FULL_I [(match_operand: 1 "register_operand" "Upl, Upl") (unspec:SVE_FULL_I [(match_operand 4) (unspec:SVE_FULL_I [(match_operand:SVE_FULL_I 2 "register_operand" "w, w") (match_operand:SVE_FULL_I 3 "register_operand" "0, w")] SVE2_COND_INT_SHIFT)] UNSPEC_PRED_X) (match_dup 3)] UNSPEC_SEL))] "TARGET_SVE2" "@ r\t%0., %1/m, %0., %2. movprfx\t%0, %3\;r\t%0., %1/m, %0., %2." "&& !CONSTANT_P (operands[4])" { operands[4] = CONSTM1_RTX (mode); } [(set_attr "movprfx" "*,yes")] ) ;; Predicated left shifts, merging with an independent value. (define_insn_and_rewrite "*cond__any" [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w, &w, &w, &w, &w, &w, ?&w, ?&w") (unspec:SVE_FULL_I [(match_operand: 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl, Upl, Upl, Upl") (unspec:SVE_FULL_I [(match_operand 5) (unspec:SVE_FULL_I [(match_operand:SVE_FULL_I 2 "register_operand" "0, 0, w, w, w, w, w, w, w") (match_operand:SVE_FULL_I 3 "aarch64_sve_shift_operand" "D, w, 0, D, w, D, w, D, w")] SVE2_COND_INT_SHIFT)] UNSPEC_PRED_X) (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, Dz, Dz, 0, 0, w, w")] UNSPEC_SEL))] "TARGET_SVE2 && !rtx_equal_p (operands[2], operands[4]) && (CONSTANT_P (operands[4]) || !rtx_equal_p (operands[3], operands[4]))" "@ movprfx\t%0., %1/z, %0.\;\t%0., %1/m, %0., #%3 movprfx\t%0., %1/z, %0.\;\t%0., %1/m, %0., %3. movprfx\t%0., %1/z, %0.\;r\t%0., %1/m, %0., %2. movprfx\t%0., %1/z, %2.\;\t%0., %1/m, %0., #%3 movprfx\t%0., %1/z, %2.\;\t%0., %1/m, %0., %3. movprfx\t%0., %1/m, %2.\;\t%0., %1/m, %0., #%3 movprfx\t%0., %1/m, %2.\;\t%0., %1/m, %0., %3. # #" "&& 1" { if (reload_completed && register_operand (operands[4], mode) && !rtx_equal_p (operands[0], operands[4])) { emit_insn (gen_vcond_mask_ (operands[0], operands[2], operands[4], operands[1])); operands[4] = operands[2] = operands[0]; } else if (!CONSTANT_P (operands[5])) operands[5] = CONSTM1_RTX (mode); else FAIL; } [(set_attr "movprfx" "yes")] ) ;; ========================================================================= ;; == Uniform ternary arithmnetic ;; ========================================================================= ;; ------------------------------------------------------------------------- ;; ---- [INT] General ternary arithmetic that maps to unspecs ;; ------------------------------------------------------------------------- ;; Includes: ;; - ADCLB ;; - ADCLT ;; - EORBT ;; - EORTB ;; - SBCLB ;; - SBCLT ;; - SQRDMLAH ;; - SQRDMLSH ;; ------------------------------------------------------------------------- (define_insn "@aarch64_sve_" [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") (unspec:SVE_FULL_I [(match_operand:SVE_FULL_I 2 "register_operand" "w, w") (match_operand:SVE_FULL_I 3 "register_operand" "w, w") (match_operand:SVE_FULL_I 1 "register_operand" "0, w")] SVE2_INT_TERNARY))] "TARGET_SVE2" "@ \t%0., %2., %3. movprfx\t%0, %1\;\t%0., %2., %3." [(set_attr "movprfx" "*,yes")] ) (define_insn "@aarch64_sve__lane_" [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w") (unspec:SVE_FULL_HSDI [(match_operand:SVE_FULL_HSDI 2 "register_operand" "w, w") (unspec:SVE_FULL_HSDI [(match_operand:SVE_FULL_HSDI 3 "register_operand" ", ") (match_operand:SI 4 "const_int_operand")] UNSPEC_SVE_LANE_SELECT) (match_operand:SVE_FULL_HSDI 1 "register_operand" "0, w")] SVE2_INT_TERNARY_LANE))] "TARGET_SVE2" "@ \t%0., %2., %3.[%4] movprfx\t%0, %1\;\t%0., %2., %3.[%4]" [(set_attr "movprfx" "*,yes")] ) ;; ------------------------------------------------------------------------- ;; ---- [INT] Multiply-and-accumulate operations ;; ------------------------------------------------------------------------- ;; Includes the lane forms of: ;; - MLA ;; - MLS ;; ------------------------------------------------------------------------- (define_insn "@aarch64_sve_add_mul_lane_" [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w") (plus:SVE_FULL_HSDI (mult:SVE_FULL_HSDI (unspec:SVE_FULL_HSDI [(match_operand:SVE_FULL_HSDI 3 "register_operand" ", ") (match_operand:SI 4 "const_int_operand")] UNSPEC_SVE_LANE_SELECT) (match_operand:SVE_FULL_HSDI 2 "register_operand" "w, w")) (match_operand:SVE_FULL_HSDI 1 "register_operand" "0, w")))] "TARGET_SVE2" "@ mla\t%0., %2., %3.[%4] movprfx\t%0, %1\;mla\t%0., %2., %3.[%4]" [(set_attr "movprfx" "*,yes")] ) (define_insn "@aarch64_sve_sub_mul_lane_" [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w") (minus:SVE_FULL_HSDI (match_operand:SVE_FULL_HSDI 1 "register_operand" "0, w") (mult:SVE_FULL_HSDI (unspec:SVE_FULL_HSDI [(match_operand:SVE_FULL_HSDI 3 "register_operand" ", ") (match_operand:SI 4 "const_int_operand")] UNSPEC_SVE_LANE_SELECT) (match_operand:SVE_FULL_HSDI 2 "register_operand" "w, w"))))] "TARGET_SVE2" "@ mls\t%0., %2., %3.[%4] movprfx\t%0, %1\;mls\t%0., %2., %3.[%4]" [(set_attr "movprfx" "*,yes")] ) ;; ------------------------------------------------------------------------- ;; ---- [INT] Binary logic operations with rotation ;; ------------------------------------------------------------------------- ;; Includes: ;; - XAR ;; ------------------------------------------------------------------------- (define_insn "@aarch64_sve2_xar" [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") (rotatert:SVE_FULL_I (xor:SVE_FULL_I (match_operand:SVE_FULL_I 1 "register_operand" "%0, w") (match_operand:SVE_FULL_I 2 "register_operand" "w, w")) (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm")))] "TARGET_SVE2" "@ xar\t%0., %0., %2., #%3 movprfx\t%0, %1\;xar\t%0., %0., %2., #%3" [(set_attr "movprfx" "*,yes")] ) ;; ------------------------------------------------------------------------- ;; ---- [INT] Ternary logic operations ;; ------------------------------------------------------------------------- ;; Includes: ;; - BCAX ;; - BSL ;; - BSL1N ;; - BSL2N ;; - EOR3 ;; - NBSL ;; ------------------------------------------------------------------------- ;; Unpredicated exclusive OR of AND. (define_expand "@aarch64_sve2_bcax" [(set (match_operand:SVE_FULL_I 0 "register_operand") (xor:SVE_FULL_I (and:SVE_FULL_I (unspec:SVE_FULL_I [(match_dup 4) (not:SVE_FULL_I (match_operand:SVE_FULL_I 3 "register_operand"))] UNSPEC_PRED_X) (match_operand:SVE_FULL_I 2 "register_operand")) (match_operand:SVE_FULL_I 1 "register_operand")))] "TARGET_SVE2" { operands[4] = CONSTM1_RTX (mode); } ) (define_insn_and_rewrite "*aarch64_sve2_bcax" [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") (xor:SVE_FULL_I (and:SVE_FULL_I (unspec:SVE_FULL_I [(match_operand 4) (not:SVE_FULL_I (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))] UNSPEC_PRED_X) (match_operand:SVE_FULL_I 2 "register_operand" "w, w")) (match_operand:SVE_FULL_I 1 "register_operand" "0, w")))] "TARGET_SVE2" "@ bcax\t%0.d, %0.d, %2.d, %3.d movprfx\t%0, %1\;bcax\t%0.d, %0.d, %2.d, %3.d" "&& !CONSTANT_P (operands[4])" { operands[4] = CONSTM1_RTX (mode); } [(set_attr "movprfx" "*,yes")] ) ;; Unpredicated 3-way exclusive OR. (define_insn "@aarch64_sve2_eor3" [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, w, ?&w") (xor:SVE_FULL_I (xor:SVE_FULL_I (match_operand:SVE_FULL_I 1 "register_operand" "0, w, w, w") (match_operand:SVE_FULL_I 2 "register_operand" "w, 0, w, w")) (match_operand:SVE_FULL_I 3 "register_operand" "w, w, 0, w")))] "TARGET_SVE2" "@ eor3\t%0.d, %0.d, %2.d, %3.d eor3\t%0.d, %0.d, %1.d, %3.d eor3\t%0.d, %0.d, %1.d, %2.d movprfx\t%0, %1\;eor3\t%0.d, %0.d, %2.d, %3.d" [(set_attr "movprfx" "*,*,*,yes")] ) ;; Use NBSL for vector NOR. (define_insn_and_rewrite "*aarch64_sve2_nor" [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") (unspec:SVE_FULL_I [(match_operand 3) (and:SVE_FULL_I (not:SVE_FULL_I (match_operand:SVE_FULL_I 1 "register_operand" "%0, w")) (not:SVE_FULL_I (match_operand:SVE_FULL_I 2 "register_operand" "w, w")))] UNSPEC_PRED_X))] "TARGET_SVE2" "@ nbsl\t%0.d, %0.d, %2.d, %0.d movprfx\t%0, %1\;nbsl\t%0.d, %0.d, %2.d, %0.d" "&& !CONSTANT_P (operands[3])" { operands[3] = CONSTM1_RTX (mode); } [(set_attr "movprfx" "*,yes")] ) ;; Use NBSL for vector NAND. (define_insn_and_rewrite "*aarch64_sve2_nand" [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") (unspec:SVE_FULL_I [(match_operand 3) (ior:SVE_FULL_I (not:SVE_FULL_I (match_operand:SVE_FULL_I 1 "register_operand" "%0, w")) (not:SVE_FULL_I (match_operand:SVE_FULL_I 2 "register_operand" "w, w")))] UNSPEC_PRED_X))] "TARGET_SVE2" "@ nbsl\t%0.d, %0.d, %2.d, %2.d movprfx\t%0, %1\;nbsl\t%0.d, %0.d, %2.d, %2.d" "&& !CONSTANT_P (operands[3])" { operands[3] = CONSTM1_RTX (mode); } [(set_attr "movprfx" "*,yes")] ) ;; Unpredicated bitwise select. ;; (op3 ? bsl_mov : bsl_dup) == (((bsl_mov ^ bsl_dup) & op3) ^ bsl_dup) (define_expand "@aarch64_sve2_bsl" [(set (match_operand:SVE_FULL_I 0 "register_operand") (xor:SVE_FULL_I (and:SVE_FULL_I (xor:SVE_FULL_I (match_operand:SVE_FULL_I 1 "register_operand") (match_operand:SVE_FULL_I 2 "register_operand")) (match_operand:SVE_FULL_I 3 "register_operand")) (match_dup 2)))] "TARGET_SVE2" ) (define_insn "*aarch64_sve2_bsl" [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") (xor:SVE_FULL_I (and:SVE_FULL_I (xor:SVE_FULL_I (match_operand:SVE_FULL_I 1 "register_operand" ", w") (match_operand:SVE_FULL_I 2 "register_operand" ", w")) (match_operand:SVE_FULL_I 3 "register_operand" "w, w")) (match_dup BSL_DUP)))] "TARGET_SVE2" "@ bsl\t%0.d, %0.d, %.d, %3.d movprfx\t%0, %\;bsl\t%0.d, %0.d, %.d, %3.d" [(set_attr "movprfx" "*,yes")] ) ;; Unpredicated bitwise inverted select. ;; (~(op3 ? bsl_mov : bsl_dup)) == (~(((bsl_mov ^ bsl_dup) & op3) ^ bsl_dup)) (define_expand "@aarch64_sve2_nbsl" [(set (match_operand:SVE_FULL_I 0 "register_operand") (unspec:SVE_FULL_I [(match_dup 4) (not:SVE_FULL_I (xor:SVE_FULL_I (and:SVE_FULL_I (xor:SVE_FULL_I (match_operand:SVE_FULL_I 1 "register_operand") (match_operand:SVE_FULL_I 2 "register_operand")) (match_operand:SVE_FULL_I 3 "register_operand")) (match_dup 2)))] UNSPEC_PRED_X))] "TARGET_SVE2" { operands[4] = CONSTM1_RTX (mode); } ) (define_insn_and_rewrite "*aarch64_sve2_nbsl" [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") (unspec:SVE_FULL_I [(match_operand 4) (not:SVE_FULL_I (xor:SVE_FULL_I (and:SVE_FULL_I (xor:SVE_FULL_I (match_operand:SVE_FULL_I 1 "register_operand" ", w") (match_operand:SVE_FULL_I 2 "register_operand" ", w")) (match_operand:SVE_FULL_I 3 "register_operand" "w, w")) (match_dup BSL_DUP)))] UNSPEC_PRED_X))] "TARGET_SVE2" "@ nbsl\t%0.d, %0.d, %.d, %3.d movprfx\t%0, %\;nbsl\t%0.d, %0.d, %.d, %3.d" "&& !CONSTANT_P (operands[4])" { operands[4] = CONSTM1_RTX (mode); } [(set_attr "movprfx" "*,yes")] ) ;; Unpredicated bitwise select with inverted first operand. ;; (op3 ? ~bsl_mov : bsl_dup) == ((~(bsl_mov ^ bsl_dup) & op3) ^ bsl_dup) (define_expand "@aarch64_sve2_bsl1n" [(set (match_operand:SVE_FULL_I 0 "register_operand") (xor:SVE_FULL_I (and:SVE_FULL_I (unspec:SVE_FULL_I [(match_dup 4) (not:SVE_FULL_I (xor:SVE_FULL_I (match_operand:SVE_FULL_I 1 "register_operand") (match_operand:SVE_FULL_I 2 "register_operand")))] UNSPEC_PRED_X) (match_operand:SVE_FULL_I 3 "register_operand")) (match_dup 2)))] "TARGET_SVE2" { operands[4] = CONSTM1_RTX (mode); } ) (define_insn_and_rewrite "*aarch64_sve2_bsl1n" [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") (xor:SVE_FULL_I (and:SVE_FULL_I (unspec:SVE_FULL_I [(match_operand 4) (not:SVE_FULL_I (xor:SVE_FULL_I (match_operand:SVE_FULL_I 1 "register_operand" ", w") (match_operand:SVE_FULL_I 2 "register_operand" ", w")))] UNSPEC_PRED_X) (match_operand:SVE_FULL_I 3 "register_operand" "w, w")) (match_dup BSL_DUP)))] "TARGET_SVE2" "@ bsl1n\t%0.d, %0.d, %.d, %3.d movprfx\t%0, %\;bsl1n\t%0.d, %0.d, %.d, %3.d" "&& !CONSTANT_P (operands[4])" { operands[4] = CONSTM1_RTX (mode); } [(set_attr "movprfx" "*,yes")] ) ;; Unpredicated bitwise select with inverted second operand. ;; (bsl_dup ? bsl_mov : ~op3) == ((bsl_dup & bsl_mov) | (~op3 & ~bsl_dup)) (define_expand "@aarch64_sve2_bsl2n" [(set (match_operand:SVE_FULL_I 0 "register_operand") (ior:SVE_FULL_I (and:SVE_FULL_I (match_operand:SVE_FULL_I 1 "register_operand") (match_operand:SVE_FULL_I 3 "register_operand")) (unspec:SVE_FULL_I [(match_dup 4) (and:SVE_FULL_I (not:SVE_FULL_I (match_operand:SVE_FULL_I 2 "register_operand")) (not:SVE_FULL_I (match_dup 3)))] UNSPEC_PRED_X)))] "TARGET_SVE2" { operands[4] = CONSTM1_RTX (mode); } ) (define_insn_and_rewrite "*aarch64_sve2_bsl2n" [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") (ior:SVE_FULL_I (and:SVE_FULL_I (match_operand:SVE_FULL_I 1 "register_operand" ", w") (match_operand:SVE_FULL_I 2 "register_operand" ", w")) (unspec:SVE_FULL_I [(match_operand 4) (and:SVE_FULL_I (not:SVE_FULL_I (match_operand:SVE_FULL_I 3 "register_operand" "w, w")) (not:SVE_FULL_I (match_dup BSL_DUP)))] UNSPEC_PRED_X)))] "TARGET_SVE2" "@ bsl2n\t%0.d, %0.d, %3.d, %.d movprfx\t%0, %\;bsl2n\t%0.d, %0.d, %3.d, %.d" "&& !CONSTANT_P (operands[4])" { operands[4] = CONSTM1_RTX (mode); } [(set_attr "movprfx" "*,yes")] ) ;; Unpredicated bitwise select with inverted second operand, alternative form. ;; (bsl_dup ? bsl_mov : ~op3) == ((bsl_dup & bsl_mov) | (~bsl_dup & ~op3)) (define_insn_and_rewrite "*aarch64_sve2_bsl2n" [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") (ior:SVE_FULL_I (and:SVE_FULL_I (match_operand:SVE_FULL_I 1 "register_operand" ", w") (match_operand:SVE_FULL_I 2 "register_operand" ", w")) (unspec:SVE_FULL_I [(match_operand 4) (and:SVE_FULL_I (not:SVE_FULL_I (match_dup BSL_DUP)) (not:SVE_FULL_I (match_operand:SVE_FULL_I 3 "register_operand" "w, w")))] UNSPEC_PRED_X)))] "TARGET_SVE2" "@ bsl2n\t%0.d, %0.d, %3.d, %.d movprfx\t%0, %\;bsl2n\t%0.d, %0.d, %3.d, %.d" "&& !CONSTANT_P (operands[4])" { operands[4] = CONSTM1_RTX (mode); } [(set_attr "movprfx" "*,yes")] ) ;; ------------------------------------------------------------------------- ;; ---- [INT] Shift-and-accumulate operations ;; ------------------------------------------------------------------------- ;; Includes: ;; - SRSRA ;; - SSRA ;; - URSRA ;; - USRA ;; ------------------------------------------------------------------------- ;; Provide the natural unpredicated interface for SSRA and USRA. (define_expand "@aarch64_sve_add_" [(set (match_operand:SVE_FULL_I 0 "register_operand") (plus:SVE_FULL_I (unspec:SVE_FULL_I [(match_dup 4) (SHIFTRT:SVE_FULL_I (match_operand:SVE_FULL_I 2 "register_operand") (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm"))] UNSPEC_PRED_X) (match_operand:SVE_FULL_I 1 "register_operand")))] "TARGET_SVE2" { operands[4] = CONSTM1_RTX (mode); } ) ;; Pattern-match SSRA and USRA as a predicated operation whose predicate ;; isn't needed. (define_insn_and_rewrite "*aarch64_sve2_sra" [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") (plus:SVE_FULL_I (unspec:SVE_FULL_I [(match_operand 4) (SHIFTRT:SVE_FULL_I (match_operand:SVE_FULL_I 2 "register_operand" "w, w") (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm"))] UNSPEC_PRED_X) (match_operand:SVE_FULL_I 1 "register_operand" "0, w")))] "TARGET_SVE2" "@ sra\t%0., %2., #%3 movprfx\t%0, %1\;sra\t%0., %2., #%3" "&& !CONSTANT_P (operands[4])" { operands[4] = CONSTM1_RTX (mode); } [(set_attr "movprfx" "*,yes")] ) ;; SRSRA and URSRA. (define_insn "@aarch64_sve_add_" [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") (plus:SVE_FULL_I (unspec:SVE_FULL_I [(match_operand:SVE_FULL_I 2 "register_operand" "w, w") (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm")] VRSHR_N) (match_operand:SVE_FULL_I 1 "register_operand" "0, w")))] "TARGET_SVE2" "@ sra\t%0., %2., #%3 movprfx\t%0, %1\;sra\t%0., %2., #%3" [(set_attr "movprfx" "*,yes")] ) ;; ------------------------------------------------------------------------- ;; ---- [INT] Shift-and-insert operations ;; ------------------------------------------------------------------------- ;; Includes: ;; - SLI ;; - SRI ;; ------------------------------------------------------------------------- ;; These instructions do not take MOVPRFX. (define_insn "@aarch64_sve_" [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w") (unspec:SVE_FULL_I [(match_operand:SVE_FULL_I 1 "register_operand" "0") (match_operand:SVE_FULL_I 2 "register_operand" "w") (match_operand:SVE_FULL_I 3 "aarch64_simd_shift_imm")] SVE2_INT_SHIFT_INSERT))] "TARGET_SVE2" "\t%0., %2., #%3" ) ;; ------------------------------------------------------------------------- ;; ---- [INT] Sum of absolute differences ;; ------------------------------------------------------------------------- ;; Includes: ;; - SABA ;; - UABA ;; ------------------------------------------------------------------------- ;; Provide the natural unpredicated interface for SABA and UABA. (define_expand "@aarch64_sve2_aba" [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") (plus:SVE_FULL_I (minus:SVE_FULL_I (unspec:SVE_FULL_I [(match_dup 4) (USMAX:SVE_FULL_I (match_operand:SVE_FULL_I 2 "register_operand" "w, w") (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))] UNSPEC_PRED_X) (unspec:SVE_FULL_I [(match_dup 4) (:SVE_FULL_I (match_dup 2) (match_dup 3))] UNSPEC_PRED_X)) (match_operand:SVE_FULL_I 1 "register_operand" "0, w")))] "TARGET_SVE2" { operands[4] = CONSTM1_RTX (mode); } ) ;; Pattern-match SABA and UABA as an absolute-difference-and-accumulate ;; operation whose predicates aren't needed. (define_insn "*aarch64_sve2_aba" [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") (plus:SVE_FULL_I (minus:SVE_FULL_I (unspec:SVE_FULL_I [(match_operand 4) (USMAX:SVE_FULL_I (match_operand:SVE_FULL_I 2 "register_operand" "w, w") (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))] UNSPEC_PRED_X) (unspec:SVE_FULL_I [(match_operand 5) (:SVE_FULL_I (match_dup 2) (match_dup 3))] UNSPEC_PRED_X)) (match_operand:SVE_FULL_I 1 "register_operand" "0, w")))] "TARGET_SVE2" "@ aba\t%0., %2., %3. movprfx\t%0, %1\;aba\t%0., %2., %3." [(set_attr "movprfx" "*,yes")] ) ;; ========================================================================= ;; == Extending arithmetic ;; ========================================================================= ;; ------------------------------------------------------------------------- ;; ---- [INT] Wide binary arithmetic ;; ------------------------------------------------------------------------- ;; Includes: ;; - SADDWB ;; - SADDWT ;; - SSUBWB ;; - SSUBWT ;; - UADDWB ;; - UADDWT ;; - USUBWB ;; - USUBWT ;; ------------------------------------------------------------------------- (define_insn "@aarch64_sve_" [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w") (unspec:SVE_FULL_HSDI [(match_operand:SVE_FULL_HSDI 1 "register_operand" "w") (match_operand: 2 "register_operand" "w")] SVE2_INT_BINARY_WIDE))] "TARGET_SVE2" "\t%0., %1., %2." ) ;; ------------------------------------------------------------------------- ;; ---- [INT] Long binary arithmetic ;; ------------------------------------------------------------------------- ;; Includes: ;; - SABDLB ;; - SABDLT ;; - SADDLB ;; - SADDLBT ;; - SADDLT ;; - SMULLB ;; - SMULLT ;; - SQDMULLB ;; - SQDMULLT ;; - SSUBLB ;; - SSUBLBT ;; - SSUBLT ;; - SSUBLTB ;; - UABDLB ;; - UABDLT ;; - UADDLB ;; - UADDLT ;; - UMULLB ;; - UMULLT ;; - USUBLB ;; - USUBLT ;; ------------------------------------------------------------------------- (define_insn "@aarch64_sve_" [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w") (unspec:SVE_FULL_HSDI [(match_operand: 1 "register_operand" "w") (match_operand: 2 "register_operand" "w")] SVE2_INT_BINARY_LONG))] "TARGET_SVE2" "\t%0., %1., %2." ) (define_insn "@aarch64_sve__lane_" [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w") (unspec:SVE_FULL_SDI [(match_operand: 1 "register_operand" "w") (unspec: [(match_operand: 2 "register_operand" "") (match_operand:SI 3 "const_int_operand")] UNSPEC_SVE_LANE_SELECT)] SVE2_INT_BINARY_LONG_LANE))] "TARGET_SVE2" "\t%0., %1., %2.[%3]" ) ;; ------------------------------------------------------------------------- ;; ---- [INT] Long left shifts ;; ------------------------------------------------------------------------- ;; Includes: ;; - SSHLLB ;; - SSHLLT ;; - USHLLB ;; - USHLLT ;; ------------------------------------------------------------------------- ;; The immediate range is enforced before generating the instruction. (define_insn "@aarch64_sve_" [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w") (unspec:SVE_FULL_HSDI [(match_operand: 1 "register_operand" "w") (match_operand:DI 2 "const_int_operand")] SVE2_INT_SHIFT_IMM_LONG))] "TARGET_SVE2" "\t%0., %1., #%2" ) ;; ------------------------------------------------------------------------- ;; ---- [INT] Long binary arithmetic with accumulation ;; ------------------------------------------------------------------------- ;; Includes: ;; - SABALB ;; - SABALT ;; - SMLALB ;; - SMLALT ;; - SMLSLB ;; - SMLSLT ;; - SQDMLALB ;; - SQDMLALBT ;; - SQDMLALT ;; - SQDMLSLB ;; - SQDMLSLBT ;; - SQDMLSLT ;; - UABALB ;; - UABALT ;; - UMLALB ;; - UMLALT ;; - UMLSLB ;; - UMLSLT ;; ------------------------------------------------------------------------- ;; Non-saturating MLA operations. (define_insn "@aarch64_sve_add_" [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w") (plus:SVE_FULL_HSDI (unspec:SVE_FULL_HSDI [(match_operand: 2 "register_operand" "w, w") (match_operand: 3 "register_operand" "w, w")] SVE2_INT_ADD_BINARY_LONG) (match_operand:SVE_FULL_HSDI 1 "register_operand" "0, w")))] "TARGET_SVE2" "@ \t%0., %2., %3. movprfx\t%0, %1\;\t%0., %2., %3." [(set_attr "movprfx" "*,yes")] ) ;; Non-saturating MLA operations with lane select. (define_insn "@aarch64_sve_add__lane_" [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w") (plus:SVE_FULL_SDI (unspec:SVE_FULL_SDI [(match_operand: 2 "register_operand" "w, w") (unspec: [(match_operand: 3 "register_operand" ", ") (match_operand:SI 4 "const_int_operand")] UNSPEC_SVE_LANE_SELECT)] SVE2_INT_ADD_BINARY_LONG_LANE) (match_operand:SVE_FULL_SDI 1 "register_operand" "0, w")))] "TARGET_SVE2" "@ \t%0., %2., %3.[%4] movprfx\t%0, %1\;\t%0., %2., %3.[%4]" [(set_attr "movprfx" "*,yes")] ) ;; Saturating MLA operations. (define_insn "@aarch64_sve_qadd_" [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w") (ss_plus:SVE_FULL_HSDI (unspec:SVE_FULL_HSDI [(match_operand: 2 "register_operand" "w, w") (match_operand: 3 "register_operand" "w, w")] SVE2_INT_QADD_BINARY_LONG) (match_operand:SVE_FULL_HSDI 1 "register_operand" "0, w")))] "TARGET_SVE2" "@ \t%0., %2., %3. movprfx\t%0, %1\;\t%0., %2., %3." [(set_attr "movprfx" "*,yes")] ) ;; Saturating MLA operations with lane select. (define_insn "@aarch64_sve_qadd__lane_" [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w") (ss_plus:SVE_FULL_SDI (unspec:SVE_FULL_SDI [(match_operand: 2 "register_operand" "w, w") (unspec: [(match_operand: 3 "register_operand" ", ") (match_operand:SI 4 "const_int_operand")] UNSPEC_SVE_LANE_SELECT)] SVE2_INT_QADD_BINARY_LONG_LANE) (match_operand:SVE_FULL_SDI 1 "register_operand" "0, w")))] "TARGET_SVE2" "@ \t%0., %2., %3.[%4] movprfx\t%0, %1\;\t%0., %2., %3.[%4]" [(set_attr "movprfx" "*,yes")] ) ;; Non-saturating MLS operations. (define_insn "@aarch64_sve_sub_" [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w") (minus:SVE_FULL_HSDI (match_operand:SVE_FULL_HSDI 1 "register_operand" "0, w") (unspec:SVE_FULL_HSDI [(match_operand: 2 "register_operand" "w, w") (match_operand: 3 "register_operand" "w, w")] SVE2_INT_SUB_BINARY_LONG)))] "TARGET_SVE2" "@ \t%0., %2., %3. movprfx\t%0, %1\;\t%0., %2., %3." [(set_attr "movprfx" "*,yes")] ) ;; Non-saturating MLS operations with lane select. (define_insn "@aarch64_sve_sub__lane_" [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w") (minus:SVE_FULL_SDI (match_operand:SVE_FULL_SDI 1 "register_operand" "0, w") (unspec:SVE_FULL_SDI [(match_operand: 2 "register_operand" "w, w") (unspec: [(match_operand: 3 "register_operand" ", ") (match_operand:SI 4 "const_int_operand")] UNSPEC_SVE_LANE_SELECT)] SVE2_INT_SUB_BINARY_LONG_LANE)))] "TARGET_SVE2" "@ \t%0., %2., %3.[%4] movprfx\t%0, %1\;\t%0., %2., %3.[%4]" [(set_attr "movprfx" "*,yes")] ) ;; Saturating MLS operations. (define_insn "@aarch64_sve_qsub_" [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w") (ss_minus:SVE_FULL_HSDI (match_operand:SVE_FULL_HSDI 1 "register_operand" "0, w") (unspec:SVE_FULL_HSDI [(match_operand: 2 "register_operand" "w, w") (match_operand: 3 "register_operand" "w, w")] SVE2_INT_QSUB_BINARY_LONG)))] "TARGET_SVE2" "@ \t%0., %2., %3. movprfx\t%0, %1\;\t%0., %2., %3." [(set_attr "movprfx" "*,yes")] ) ;; Saturating MLS operations with lane select. (define_insn "@aarch64_sve_qsub__lane_" [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w") (ss_minus:SVE_FULL_SDI (match_operand:SVE_FULL_SDI 1 "register_operand" "0, w") (unspec:SVE_FULL_SDI [(match_operand: 2 "register_operand" "w, w") (unspec: [(match_operand: 3 "register_operand" ", ") (match_operand:SI 4 "const_int_operand")] UNSPEC_SVE_LANE_SELECT)] SVE2_INT_QSUB_BINARY_LONG_LANE)))] "TARGET_SVE2" "@ \t%0., %2., %3.[%4] movprfx\t%0, %1\;\t%0., %2., %3.[%4]" [(set_attr "movprfx" "*,yes")] ) ;; ------------------------------------------------------------------------- ;; ---- [FP] Long multiplication with accumulation ;; ------------------------------------------------------------------------- ;; Includes: ;; - FMLALB ;; - FMLALT ;; - FMLSLB ;; - FMLSLT ;; ------------------------------------------------------------------------- (define_insn "@aarch64_sve_" [(set (match_operand:VNx4SF_ONLY 0 "register_operand" "=w, ?&w") (unspec:VNx4SF_ONLY [(match_operand: 1 "register_operand" "w, w") (match_operand: 2 "register_operand" "w, w") (match_operand:VNx4SF_ONLY 3 "register_operand" "0, w")] SVE2_FP_TERNARY_LONG))] "TARGET_SVE2" "@ \t%0., %1., %2. movprfx\t%0, %3\;\t%0., %1., %2." [(set_attr "movprfx" "*,yes")] ) (define_insn "@aarch64__lane_" [(set (match_operand:VNx4SF_ONLY 0 "register_operand" "=w, ?&w") (unspec:VNx4SF_ONLY [(match_operand: 1 "register_operand" "w, w") (unspec: [(match_operand: 2 "register_operand" ", ") (match_operand:SI 3 "const_int_operand")] UNSPEC_SVE_LANE_SELECT) (match_operand:VNx4SF_ONLY 4 "register_operand" "0, w")] SVE2_FP_TERNARY_LONG_LANE))] "TARGET_SVE2" "@ \t%0., %1., %2.[%3] movprfx\t%0, %4\;\t%0., %1., %2.[%3]" [(set_attr "movprfx" "*,yes")] ) ;; ========================================================================= ;; == Narrowing arithnetic ;; ========================================================================= ;; ------------------------------------------------------------------------- ;; ---- [INT] Narrowing unary arithmetic ;; ------------------------------------------------------------------------- ;; Includes: ;; - SQXTNB ;; - SQXTNT ;; - SQXTUNB ;; - SQXTUNT ;; - UQXTNB ;; - UQXTNT ;; ------------------------------------------------------------------------- (define_insn "@aarch64_sve_" [(set (match_operand: 0 "register_operand" "=w") (unspec: [(match_operand:SVE_FULL_HSDI 1 "register_operand" "w")] SVE2_INT_UNARY_NARROWB))] "TARGET_SVE2" "\t%0., %1." ) ;; These instructions do not take MOVPRFX. (define_insn "@aarch64_sve_" [(set (match_operand: 0 "register_operand" "=w") (unspec: [(match_operand: 1 "register_operand" "0") (match_operand:SVE_FULL_HSDI 2 "register_operand" "w")] SVE2_INT_UNARY_NARROWT))] "TARGET_SVE2" "\t%0., %2." ) ;; ------------------------------------------------------------------------- ;; ---- [INT] Narrowing binary arithmetic ;; ------------------------------------------------------------------------- ;; Includes: ;; - ADDHNB ;; - ADDHNT ;; - RADDHNB ;; - RADDHNT ;; - RSUBHNB ;; - RSUBHNT ;; - SUBHNB ;; - SUBHNT ;; ------------------------------------------------------------------------- (define_insn "@aarch64_sve_" [(set (match_operand: 0 "register_operand" "=w") (unspec: [(match_operand:SVE_FULL_HSDI 1 "register_operand" "w") (match_operand:SVE_FULL_HSDI 2 "register_operand" "w")] SVE2_INT_BINARY_NARROWB))] "TARGET_SVE2" "\t%0., %1., %2." ) ;; These instructions do not take MOVPRFX. (define_insn "@aarch64_sve_" [(set (match_operand: 0 "register_operand" "=w") (unspec: [(match_operand: 1 "register_operand" "0") (match_operand:SVE_FULL_HSDI 2 "register_operand" "w") (match_operand:SVE_FULL_HSDI 3 "register_operand" "w")] SVE2_INT_BINARY_NARROWT))] "TARGET_SVE2" "\t%0., %2., %3." ) ;; ------------------------------------------------------------------------- ;; ---- [INT] Narrowing right shifts ;; ------------------------------------------------------------------------- ;; Includes: ;; - RSHRNB ;; - RSHRNT ;; - SHRNB ;; - SHRNT ;; - SQRSHRNB ;; - SQRSHRNT ;; - SQRSHRUNB ;; - SQRSHRUNT ;; - SQSHRNB ;; - SQSHRNT ;; - SQSHRUNB ;; - SQSHRUNT ;; - UQRSHRNB ;; - UQRSHRNT ;; - UQSHRNB ;; - UQSHRNT ;; ------------------------------------------------------------------------- ;; The immediate range is enforced before generating the instruction. (define_insn "@aarch64_sve_" [(set (match_operand: 0 "register_operand" "=w") (unspec: [(match_operand:SVE_FULL_HSDI 1 "register_operand" "w") (match_operand:DI 2 "const_int_operand")] SVE2_INT_SHIFT_IMM_NARROWB))] "TARGET_SVE2" "\t%0., %1., #%2" ) ;; The immediate range is enforced before generating the instruction. ;; These instructions do not take MOVPRFX. (define_insn "@aarch64_sve_" [(set (match_operand: 0 "register_operand" "=w") (unspec: [(match_operand: 1 "register_operand" "0") (match_operand:SVE_FULL_HSDI 2 "register_operand" "w") (match_operand:DI 3 "const_int_operand")] SVE2_INT_SHIFT_IMM_NARROWT))] "TARGET_SVE2" "\t%0., %2., #%3" ) ;; ========================================================================= ;; == Pairwise arithmetic ;; ========================================================================= ;; ------------------------------------------------------------------------- ;; ---- [INT] Pairwise arithmetic ;; ------------------------------------------------------------------------- ;; Includes: ;; - ADDP ;; - SMAXP ;; - SMINP ;; - UMAXP ;; - UMINP ;; ------------------------------------------------------------------------- (define_insn "@aarch64_pred_" [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") (unspec:SVE_FULL_I [(match_operand: 1 "register_operand" "Upl, Upl") (match_operand:SVE_FULL_I 2 "register_operand" "0, w") (match_operand:SVE_FULL_I 3 "register_operand" "w, w")] SVE2_INT_BINARY_PAIR))] "TARGET_SVE2" "@ \t%0., %1/m, %0., %3. movprfx\t%0, %2\;\t%0., %1/m, %0., %3." [(set_attr "movprfx" "*,yes")] ) ;; ------------------------------------------------------------------------- ;; ---- [FP] Pairwise arithmetic ;; ------------------------------------------------------------------------- ;; Includes: ;; - FADDP ;; - FMAXP ;; - FMAXNMP ;; - FMINP ;; - FMINNMP ;; ------------------------------------------------------------------------- (define_insn "@aarch64_pred_" [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") (unspec:SVE_FULL_F [(match_operand: 1 "register_operand" "Upl, Upl") (match_operand:SVE_FULL_F 2 "register_operand" "0, w") (match_operand:SVE_FULL_F 3 "register_operand" "w, w")] SVE2_FP_BINARY_PAIR))] "TARGET_SVE2" "@ \t%0., %1/m, %0., %3. movprfx\t%0, %2\;\t%0., %1/m, %0., %3." [(set_attr "movprfx" "*,yes")] ) ;; ------------------------------------------------------------------------- ;; ---- [INT] Pairwise arithmetic with accumulation ;; ------------------------------------------------------------------------- ;; Includes: ;; - SADALP ;; - UADALP ;; ------------------------------------------------------------------------- ;; Predicated pairwise absolute difference and accumulate with merging. (define_expand "@cond_" [(set (match_operand:SVE_FULL_HSDI 0 "register_operand") (unspec:SVE_FULL_HSDI [(match_operand: 1 "register_operand") (unspec:SVE_FULL_HSDI [(match_dup 1) (match_operand:SVE_FULL_HSDI 2 "register_operand") (match_operand: 3 "register_operand")] SVE2_INT_BINARY_PAIR_LONG) (match_operand:SVE_FULL_HSDI 4 "aarch64_simd_reg_or_zero")] UNSPEC_SEL))] "TARGET_SVE2" { /* Only target code is aware of these operations, so we don't need to handle the fully-general case. */ gcc_assert (rtx_equal_p (operands[2], operands[4]) || CONSTANT_P (operands[4])); }) ;; Predicated pairwise absolute difference and accumulate, merging with ;; the first input. (define_insn_and_rewrite "*cond__2" [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w") (unspec:SVE_FULL_HSDI [(match_operand: 1 "register_operand" "Upl, Upl") (unspec:SVE_FULL_HSDI [(match_operand 4) (match_operand:SVE_FULL_HSDI 2 "register_operand" "0, w") (match_operand: 3 "register_operand" "w, w")] SVE2_INT_BINARY_PAIR_LONG) (match_dup 2)] UNSPEC_SEL))] "TARGET_SVE2" "@ \t%0., %1/m, %3. movprfx\t%0, %2\;\t%0., %1/m, %3." "&& !CONSTANT_P (operands[4])" { operands[4] = CONSTM1_RTX (mode); } [(set_attr "movprfx" "*,yes")] ) ;; Predicated pairwise absolute difference and accumulate, merging with zero. (define_insn_and_rewrite "*cond__z" [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=&w, &w") (unspec:SVE_FULL_HSDI [(match_operand: 1 "register_operand" "Upl, Upl") (unspec:SVE_FULL_HSDI [(match_operand 5) (match_operand:SVE_FULL_HSDI 2 "register_operand" "0, w") (match_operand: 3 "register_operand" "w, w")] SVE2_INT_BINARY_PAIR_LONG) (match_operand:SVE_FULL_HSDI 4 "aarch64_simd_imm_zero")] UNSPEC_SEL))] "TARGET_SVE2" "@ movprfx\t%0., %1/z, %0.\;\t%0., %1/m, %3. movprfx\t%0., %1/z, %2.\;\t%0., %1/m, %3." "&& !CONSTANT_P (operands[5])" { operands[5] = CONSTM1_RTX (mode); } [(set_attr "movprfx" "yes")] ) ;; ========================================================================= ;; == Complex arithmetic ;; ========================================================================= ;; ------------------------------------------------------------------------- ;; ---- [INT] Complex binary operations ;; ------------------------------------------------------------------------- ;; Includes: ;; - CADD ;; - SQCADD ;; ------------------------------------------------------------------------- (define_insn "@aarch64_sve_" [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") (unspec:SVE_FULL_I [(match_operand:SVE_FULL_I 1 "register_operand" "0, w") (match_operand:SVE_FULL_I 2 "register_operand" "w, w")] SVE2_INT_CADD))] "TARGET_SVE2" "@ \t%0., %0., %2., # movprfx\t%0, %1\;\t%0., %0., %2., #" [(set_attr "movprfx" "*,yes")] ) ;; ------------------------------------------------------------------------- ;; ---- [INT] Complex ternary operations ;; ------------------------------------------------------------------------- ;; Includes: ;; - CMLA ;; - SQRDCMLA ;; ------------------------------------------------------------------------- (define_insn "@aarch64_sve_" [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") (unspec:SVE_FULL_I [(match_operand:SVE_FULL_I 1 "register_operand" "0, w") (match_operand:SVE_FULL_I 2 "register_operand" "w, w") (match_operand:SVE_FULL_I 3 "register_operand" "w, w")] SVE2_INT_CMLA))] "TARGET_SVE2" "@ \t%0., %2., %3., # movprfx\t%0, %1\;\t%0., %2., %3., #" [(set_attr "movprfx" "*,yes")] ) (define_insn "@aarch64__lane_" [(set (match_operand:SVE_FULL_HSI 0 "register_operand" "=w, ?&w") (unspec:SVE_FULL_HSI [(match_operand:SVE_FULL_HSI 1 "register_operand" "0, w") (match_operand:SVE_FULL_HSI 2 "register_operand" "w, w") (unspec:SVE_FULL_HSI [(match_operand:SVE_FULL_HSI 3 "register_operand" ", ") (match_operand:SI 4 "const_int_operand")] UNSPEC_SVE_LANE_SELECT)] SVE2_INT_CMLA))] "TARGET_SVE2" "@ \t%0., %2., %3.[%4], # movprfx\t%0, %1\;\t%0., %2., %3.[%4], #" [(set_attr "movprfx" "*,yes")] ) ;; ------------------------------------------------------------------------- ;; ---- [INT] Complex dot product ;; ------------------------------------------------------------------------- ;; Includes: ;; - CDOT ;; ------------------------------------------------------------------------- (define_insn "@aarch64_sve_" [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w") (unspec:SVE_FULL_SDI [(match_operand:SVE_FULL_SDI 1 "register_operand" "0, w") (match_operand: 2 "register_operand" "w, w") (match_operand: 3 "register_operand" "w, w")] SVE2_INT_CDOT))] "TARGET_SVE2" "@ \t%0., %2., %3., # movprfx\t%0, %1\;\t%0., %2., %3., #" [(set_attr "movprfx" "*,yes")] ) (define_insn "@aarch64__lane_" [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w") (unspec:SVE_FULL_SDI [(match_operand:SVE_FULL_SDI 1 "register_operand" "0, w") (match_operand: 2 "register_operand" "w, w") (unspec: [(match_operand: 3 "register_operand" ", ") (match_operand:SI 4 "const_int_operand")] UNSPEC_SVE_LANE_SELECT)] SVE2_INT_CDOT))] "TARGET_SVE2" "@ \t%0., %2., %3.[%4], # movprfx\t%0, %1\;\t%0., %2., %3.[%4], #" [(set_attr "movprfx" "*,yes")] ) ;; ========================================================================= ;; == Conversions ;; ========================================================================= ;; ------------------------------------------------------------------------- ;; ---- [FP<-FP] Widening conversions ;; ------------------------------------------------------------------------- ;; Includes: ;; - FCVTLT ;; ------------------------------------------------------------------------- ;; Predicated convert long top. (define_insn "@aarch64_pred_" [(set (match_operand:SVE_FULL_SDF 0 "register_operand" "=w") (unspec:SVE_FULL_SDF [(match_operand: 1 "register_operand" "Upl") (match_operand:SI 3 "aarch64_sve_gp_strictness") (match_operand: 2 "register_operand" "0")] SVE2_COND_FP_UNARY_LONG))] "TARGET_SVE2" "\t%0., %1/m, %0." ) ;; Predicated convert long top with merging. (define_expand "@cond_" [(set (match_operand:SVE_FULL_SDF 0 "register_operand") (unspec:SVE_FULL_SDF [(match_operand: 1 "register_operand") (unspec:SVE_FULL_SDF [(match_dup 1) (const_int SVE_STRICT_GP) (match_operand: 2 "register_operand")] SVE2_COND_FP_UNARY_LONG) (match_operand:SVE_FULL_SDF 3 "register_operand")] UNSPEC_SEL))] "TARGET_SVE2" ) ;; These instructions do not take MOVPRFX. (define_insn_and_rewrite "*cond__relaxed" [(set (match_operand:SVE_FULL_SDF 0 "register_operand" "=w") (unspec:SVE_FULL_SDF [(match_operand: 1 "register_operand" "Upl") (unspec:SVE_FULL_SDF [(match_operand 4) (const_int SVE_RELAXED_GP) (match_operand: 2 "register_operand" "w")] SVE2_COND_FP_UNARY_LONG) (match_operand:SVE_FULL_SDF 3 "register_operand" "0")] UNSPEC_SEL))] "TARGET_SVE2" "\t%0., %1/m, %2." "&& !rtx_equal_p (operands[1], operands[4])" { operands[4] = copy_rtx (operands[1]); } ) (define_insn "*cond__strict" [(set (match_operand:SVE_FULL_SDF 0 "register_operand" "=w") (unspec:SVE_FULL_SDF [(match_operand: 1 "register_operand" "Upl") (unspec:SVE_FULL_SDF [(match_dup 1) (const_int SVE_STRICT_GP) (match_operand: 2 "register_operand" "w")] SVE2_COND_FP_UNARY_LONG) (match_operand:SVE_FULL_SDF 3 "register_operand" "0")] UNSPEC_SEL))] "TARGET_SVE2" "\t%0., %1/m, %2." ) ;; ------------------------------------------------------------------------- ;; ---- [FP<-FP] Narrowing conversions ;; ------------------------------------------------------------------------- ;; Includes: ;; - FCVTNT ;; - FCVTX ;; - FCVTXNT ;; ------------------------------------------------------------------------- ;; Predicated FCVTNT. This doesn't give a natural aarch64_pred_*/cond_* ;; pair because the even elements always have to be supplied for active ;; elements, even if the inactive elements don't matter. ;; ;; These instructions do not take MOVPRFX. (define_insn "@aarch64_sve_cvtnt" [(set (match_operand:SVE_FULL_HSF 0 "register_operand" "=w") (unspec:SVE_FULL_HSF [(match_operand: 2 "register_operand" "Upl") (const_int SVE_STRICT_GP) (match_operand:SVE_FULL_HSF 1 "register_operand" "0") (match_operand: 3 "register_operand" "w")] UNSPEC_COND_FCVTNT))] "TARGET_SVE2" "fcvtnt\t%0., %2/m, %3." ) ;; Predicated FCVTX (equivalent to what would be FCVTXNB, except that ;; it supports MOVPRFX). (define_insn "@aarch64_pred_" [(set (match_operand:VNx4SF_ONLY 0 "register_operand" "=w, ?&w") (unspec:VNx4SF_ONLY [(match_operand: 1 "register_operand" "Upl, Upl") (match_operand:SI 3 "aarch64_sve_gp_strictness") (match_operand: 2 "register_operand" "0, w")] SVE2_COND_FP_UNARY_NARROWB))] "TARGET_SVE2" "@ \t%0., %1/m, %2. movprfx\t%0, %2\;\t%0., %1/m, %2." [(set_attr "movprfx" "*,yes")] ) ;; Predicated FCVTX with merging. (define_expand "@cond_" [(set (match_operand:VNx4SF_ONLY 0 "register_operand") (unspec:VNx4SF_ONLY [(match_operand: 1 "register_operand") (unspec:VNx4SF_ONLY [(match_dup 1) (const_int SVE_STRICT_GP) (match_operand: 2 "register_operand")] SVE2_COND_FP_UNARY_NARROWB) (match_operand:VNx4SF_ONLY 3 "aarch64_simd_reg_or_zero")] UNSPEC_SEL))] "TARGET_SVE2" ) (define_insn_and_rewrite "*cond__any_relaxed" [(set (match_operand:VNx4SF_ONLY 0 "register_operand" "=&w, &w, &w") (unspec:VNx4SF_ONLY [(match_operand: 1 "register_operand" "Upl, Upl, Upl") (unspec:VNx4SF_ONLY [(match_operand 4) (const_int SVE_RELAXED_GP) (match_operand: 2 "register_operand" "w, w, w")] SVE2_COND_FP_UNARY_NARROWB) (match_operand:VNx4SF_ONLY 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] UNSPEC_SEL))] "TARGET_SVE2 && !rtx_equal_p (operands[2], operands[3])" "@ \t%0., %1/m, %2. movprfx\t%0., %1/z, %2.\;\t%0., %1/m, %2. movprfx\t%0, %3\;\t%0., %1/m, %2." "&& !rtx_equal_p (operands[1], operands[4])" { operands[4] = copy_rtx (operands[1]); } [(set_attr "movprfx" "*,yes,yes")] ) (define_insn "*cond__any_strict" [(set (match_operand:VNx4SF_ONLY 0 "register_operand" "=&w, &w, &w") (unspec:VNx4SF_ONLY [(match_operand: 1 "register_operand" "Upl, Upl, Upl") (unspec:VNx4SF_ONLY [(match_dup 1) (const_int SVE_STRICT_GP) (match_operand: 2 "register_operand" "w, w, w")] SVE2_COND_FP_UNARY_NARROWB) (match_operand:VNx4SF_ONLY 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] UNSPEC_SEL))] "TARGET_SVE2 && !rtx_equal_p (operands[2], operands[3])" "@ \t%0., %1/m, %2. movprfx\t%0., %1/z, %2.\;\t%0., %1/m, %2. movprfx\t%0, %3\;\t%0., %1/m, %2." [(set_attr "movprfx" "*,yes,yes")] ) ;; Predicated FCVTXNT. This doesn't give a natural aarch64_pred_*/cond_* ;; pair because the even elements always have to be supplied for active ;; elements, even if the inactive elements don't matter. ;; ;; These instructions do not take MOVPRFX. (define_insn "@aarch64_sve2_cvtxnt" [(set (match_operand: 0 "register_operand" "=w") (unspec: [(match_operand: 2 "register_operand" "Upl") (const_int SVE_STRICT_GP) (match_operand: 1 "register_operand" "0") (match_operand:VNx2DF_ONLY 3 "register_operand" "w")] UNSPEC_COND_FCVTXNT))] "TARGET_SVE2" "fcvtxnt\t%0., %2/m, %3." ) ;; ========================================================================= ;; == Other arithmetic ;; ========================================================================= ;; ------------------------------------------------------------------------- ;; ---- [INT] Reciprocal approximation ;; ------------------------------------------------------------------------- ;; Includes: ;; - URECPE ;; - URSQRTE ;; ------------------------------------------------------------------------- ;; Predicated integer unary operations. (define_insn "@aarch64_pred_" [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=w, ?&w") (unspec:VNx4SI_ONLY [(match_operand: 1 "register_operand" "Upl, Upl") (unspec:VNx4SI_ONLY [(match_operand:VNx4SI_ONLY 2 "register_operand" "0, w")] SVE2_U32_UNARY)] UNSPEC_PRED_X))] "TARGET_SVE2" "@ \t%0., %1/m, %2. movprfx\t%0, %2\;\t%0., %1/m, %2." [(set_attr "movprfx" "*,yes")] ) ;; Predicated integer unary operations with merging. (define_expand "@cond_" [(set (match_operand:VNx4SI_ONLY 0 "register_operand") (unspec:VNx4SI_ONLY [(match_operand: 1 "register_operand") (unspec:VNx4SI_ONLY [(match_dup 4) (unspec:VNx4SI_ONLY [(match_operand:VNx4SI_ONLY 2 "register_operand")] SVE2_U32_UNARY)] UNSPEC_PRED_X) (match_operand:VNx4SI_ONLY 3 "aarch64_simd_reg_or_zero")] UNSPEC_SEL))] "TARGET_SVE2" { operands[4] = CONSTM1_RTX (mode); } ) (define_insn_and_rewrite "*cond_" [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=w, ?&w, ?&w") (unspec:VNx4SI_ONLY [(match_operand: 1 "register_operand" "Upl, Upl, Upl") (unspec:VNx4SI_ONLY [(match_operand 4) (unspec:VNx4SI_ONLY [(match_operand:VNx4SI_ONLY 2 "register_operand" "w, w, w")] SVE2_U32_UNARY)] UNSPEC_PRED_X) (match_operand:VNx4SI_ONLY 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] UNSPEC_SEL))] "TARGET_SVE2" "@ \t%0., %1/m, %2. movprfx\t%0., %1/z, %2.\;\t%0., %1/m, %2. movprfx\t%0, %3\;\t%0., %1/m, %2." "&& !CONSTANT_P (operands[4])" { operands[4] = CONSTM1_RTX (mode); } [(set_attr "movprfx" "*,yes,yes")] ) ;; ------------------------------------------------------------------------- ;; ---- [INT<-FP] Base-2 logarithm ;; ------------------------------------------------------------------------- ;; Includes: ;; - FLOGB ;; ------------------------------------------------------------------------- ;; Predicated FLOGB. (define_insn "@aarch64_pred_" [(set (match_operand: 0 "register_operand" "=w, ?&w") (unspec: [(match_operand: 1 "register_operand" "Upl, Upl") (match_operand:SI 3 "aarch64_sve_gp_strictness") (match_operand:SVE_FULL_F 2 "register_operand" "0, w")] SVE2_COND_INT_UNARY_FP))] "TARGET_SVE2" "@ \t%0., %1/m, %2. movprfx\t%0, %2\;\t%0., %1/m, %2." [(set_attr "movprfx" "*,yes")] ) ;; Predicated FLOGB with merging. (define_expand "@cond_" [(set (match_operand: 0 "register_operand") (unspec: [(match_operand: 1 "register_operand") (unspec: [(match_dup 1) (const_int SVE_STRICT_GP) (match_operand:SVE_FULL_F 2 "register_operand")] SVE2_COND_INT_UNARY_FP) (match_operand: 3 "aarch64_simd_reg_or_zero")] UNSPEC_SEL))] "TARGET_SVE2" ) (define_insn_and_rewrite "*cond_" [(set (match_operand: 0 "register_operand" "=&w, ?&w, ?&w") (unspec: [(match_operand: 1 "register_operand" "Upl, Upl, Upl") (unspec: [(match_operand 4) (const_int SVE_RELAXED_GP) (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")] SVE2_COND_INT_UNARY_FP) (match_operand: 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] UNSPEC_SEL))] "TARGET_SVE2 && !rtx_equal_p (operands[2], operands[3])" "@ \t%0., %1/m, %2. movprfx\t%0., %1/z, %2.\;\t%0., %1/m, %2. movprfx\t%0, %3\;\t%0., %1/m, %2." "&& !rtx_equal_p (operands[1], operands[4])" { operands[4] = copy_rtx (operands[1]); } [(set_attr "movprfx" "*,yes,yes")] ) (define_insn "*cond__strict" [(set (match_operand: 0 "register_operand" "=&w, ?&w, ?&w") (unspec: [(match_operand: 1 "register_operand" "Upl, Upl, Upl") (unspec: [(match_dup 1) (const_int SVE_STRICT_GP) (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")] SVE2_COND_INT_UNARY_FP) (match_operand: 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] UNSPEC_SEL))] "TARGET_SVE2 && !rtx_equal_p (operands[2], operands[3])" "@ \t%0., %1/m, %2. movprfx\t%0., %1/z, %2.\;\t%0., %1/m, %2. movprfx\t%0, %3\;\t%0., %1/m, %2." [(set_attr "movprfx" "*,yes,yes")] ) ;; ------------------------------------------------------------------------- ;; ---- [INT] Polynomial multiplication ;; ------------------------------------------------------------------------- ;; Includes: ;; - PMUL ;; - PMULLB ;; - PMULLT ;; ------------------------------------------------------------------------- ;; Uniform PMUL. (define_insn "@aarch64_sve2_pmul" [(set (match_operand:VNx16QI_ONLY 0 "register_operand" "=w") (unspec:VNx16QI_ONLY [(match_operand:VNx16QI_ONLY 1 "register_operand" "w") (match_operand:VNx16QI_ONLY 2 "register_operand" "w")] UNSPEC_PMUL))] "TARGET_SVE2" "pmul\t%0., %1., %2." ) ;; Extending PMUL, with the results modeled as wider vectors. ;; This representation is only possible for .H and .D, not .Q. (define_insn "@aarch64_sve_" [(set (match_operand:SVE_FULL_HDI 0 "register_operand" "=w") (unspec:SVE_FULL_HDI [(match_operand: 1 "register_operand" "w") (match_operand: 2 "register_operand" "w")] SVE2_PMULL))] "TARGET_SVE2" "\t%0., %1., %2." ) ;; Extending PMUL, with the results modeled as pairs of values. ;; This representation works for .H, .D and .Q, with .Q requiring ;; the AES extension. (This is enforced by the mode iterator.) (define_insn "@aarch64_sve_" [(set (match_operand:SVE2_PMULL_PAIR_I 0 "register_operand" "=w") (unspec:SVE2_PMULL_PAIR_I [(match_operand:SVE2_PMULL_PAIR_I 1 "register_operand" "w") (match_operand:SVE2_PMULL_PAIR_I 2 "register_operand" "w")] SVE2_PMULL_PAIR))] "TARGET_SVE2" "\t%0., %1., %2." ) ;; ========================================================================= ;; == Permutation ;; ========================================================================= ;; ------------------------------------------------------------------------- ;; ---- [INT,FP] General permutes ;; ------------------------------------------------------------------------- ;; Includes: ;; - TBL (vector pair form) ;; - TBX ;; ------------------------------------------------------------------------- ;; TBL on a pair of data vectors. (define_insn "@aarch64_sve2_tbl2" [(set (match_operand:SVE_FULL 0 "register_operand" "=w") (unspec:SVE_FULL [(match_operand: 1 "register_operand" "w") (match_operand: 2 "register_operand" "w")] UNSPEC_TBL2))] "TARGET_SVE2" "tbl\t%0., %1, %2." ) ;; TBX. These instructions do not take MOVPRFX. (define_insn "@aarch64_sve2_tbx" [(set (match_operand:SVE_FULL 0 "register_operand" "=w") (unspec:SVE_FULL [(match_operand:SVE_FULL 1 "register_operand" "0") (match_operand:SVE_FULL 2 "register_operand" "w") (match_operand: 3 "register_operand" "w")] UNSPEC_TBX))] "TARGET_SVE2" "tbx\t%0., %2., %3." ) ;; ------------------------------------------------------------------------- ;; ---- [INT] Optional bit-permute extensions ;; ------------------------------------------------------------------------- ;; Includes: ;; - BDEP ;; - BEXT ;; - BGRP ;; ------------------------------------------------------------------------- (define_insn "@aarch64_sve_" [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w") (unspec:SVE_FULL_I [(match_operand:SVE_FULL_I 1 "register_operand" "w") (match_operand:SVE_FULL_I 2 "register_operand" "w")] SVE2_INT_BITPERM))] "TARGET_SVE2_BITPERM" "\t%0., %1., %2." ) ;; ========================================================================= ;; == General ;; ========================================================================= ;; ------------------------------------------------------------------------- ;; ---- Check for aliases between pointers ;; ------------------------------------------------------------------------- ;; The patterns in this section are synthetic: WHILERW and WHILEWR are ;; defined in aarch64-sve.md instead. ;; ------------------------------------------------------------------------- ;; Use WHILERW and WHILEWR to accelerate alias checks. This is only ;; possible if the accesses we're checking are exactly the same size ;; as an SVE vector. (define_expand "check__ptrs" [(match_operand:GPI 0 "register_operand") (unspec:VNx16BI [(match_operand:GPI 1 "register_operand") (match_operand:GPI 2 "register_operand") (match_operand:GPI 3 "aarch64_bytes_per_sve_vector_operand") (match_operand:GPI 4 "const_int_operand")] SVE2_WHILE_PTR)] "TARGET_SVE2" { /* Use the widest predicate mode we can. */ unsigned int align = INTVAL (operands[4]); if (align > 8) align = 8; machine_mode pred_mode = aarch64_sve_pred_mode (align).require (); /* Emit a WHILERW or WHILEWR, setting the condition codes based on the result. */ emit_insn (gen_while_ptest (, mode, pred_mode, gen_rtx_SCRATCH (pred_mode), operands[1], operands[2], CONSTM1_RTX (VNx16BImode), CONSTM1_RTX (pred_mode))); /* Set operand 0 to true if the last bit of the predicate result is set, i.e. if all elements are free of dependencies. */ rtx cc_reg = gen_rtx_REG (CC_NZCmode, CC_REGNUM); rtx cmp = gen_rtx_LTU (mode, cc_reg, const0_rtx); emit_insn (gen_aarch64_cstore (operands[0], cmp, cc_reg)); DONE; }) ;; ------------------------------------------------------------------------- ;; ---- Histogram processing ;; ------------------------------------------------------------------------- ;; Includes: ;; - HISTCNT ;; - HISTSEG ;; ------------------------------------------------------------------------- (define_insn "@aarch64_sve2_histcnt" [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w") (unspec:SVE_FULL_SDI [(match_operand: 1 "register_operand" "Upl") (match_operand:SVE_FULL_SDI 2 "register_operand" "w") (match_operand:SVE_FULL_SDI 3 "register_operand" "w")] UNSPEC_HISTCNT))] "TARGET_SVE2" "histcnt\t%0., %1/z, %2., %3." ) (define_insn "@aarch64_sve2_histseg" [(set (match_operand:VNx16QI_ONLY 0 "register_operand" "=w") (unspec:VNx16QI_ONLY [(match_operand:VNx16QI_ONLY 1 "register_operand" "w") (match_operand:VNx16QI_ONLY 2 "register_operand" "w")] UNSPEC_HISTSEG))] "TARGET_SVE2" "histseg\t%0., %1., %2." ) ;; ------------------------------------------------------------------------- ;; ---- String matching ;; ------------------------------------------------------------------------- ;; Includes: ;; - MATCH ;; - NMATCH ;; ------------------------------------------------------------------------- ;; Predicated string matching. (define_insn "@aarch64_pred_" [(set (match_operand: 0 "register_operand" "=Upa") (unspec: [(match_operand: 1 "register_operand" "Upl") (match_operand:SI 2 "aarch64_sve_ptrue_flag") (unspec: [(match_operand:SVE_FULL_BHI 3 "register_operand" "w") (match_operand:SVE_FULL_BHI 4 "register_operand" "w")] SVE2_MATCH)] UNSPEC_PRED_Z)) (clobber (reg:CC_NZC CC_REGNUM))] "TARGET_SVE2" "\t%0., %1/z, %3., %4." ) ;; Predicated string matching in which both the flag and predicate results ;; are interesting. (define_insn_and_rewrite "*aarch64_pred__cc" [(set (reg:CC_NZC CC_REGNUM) (unspec:CC_NZC [(match_operand:VNx16BI 1 "register_operand" "Upl") (match_operand 4) (match_operand:SI 5 "aarch64_sve_ptrue_flag") (unspec: [(match_operand 6) (match_operand:SI 7 "aarch64_sve_ptrue_flag") (unspec: [(match_operand:SVE_FULL_BHI 2 "register_operand" "w") (match_operand:SVE_FULL_BHI 3 "register_operand" "w")] SVE2_MATCH)] UNSPEC_PRED_Z)] UNSPEC_PTEST)) (set (match_operand: 0 "register_operand" "=Upa") (unspec: [(match_dup 6) (match_dup 7) (unspec: [(match_dup 2) (match_dup 3)] SVE2_MATCH)] UNSPEC_PRED_Z))] "TARGET_SVE2 && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])" "\t%0., %1/z, %2., %3." "&& !rtx_equal_p (operands[4], operands[6])" { operands[6] = copy_rtx (operands[4]); operands[7] = operands[5]; } ) ;; Predicated string matching in which only the flags result is interesting. (define_insn_and_rewrite "*aarch64_pred__ptest" [(set (reg:CC_NZC CC_REGNUM) (unspec:CC_NZC [(match_operand:VNx16BI 1 "register_operand" "Upl") (match_operand 4) (match_operand:SI 5 "aarch64_sve_ptrue_flag") (unspec: [(match_operand 6) (match_operand:SI 7 "aarch64_sve_ptrue_flag") (unspec: [(match_operand:SVE_FULL_BHI 2 "register_operand" "w") (match_operand:SVE_FULL_BHI 3 "register_operand" "w")] SVE2_MATCH)] UNSPEC_PRED_Z)] UNSPEC_PTEST)) (clobber (match_scratch: 0 "=Upa"))] "TARGET_SVE2 && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])" "\t%0., %1/z, %2., %3." "&& !rtx_equal_p (operands[4], operands[6])" { operands[6] = copy_rtx (operands[4]); operands[7] = operands[5]; } ) ;; ========================================================================= ;; == Crypotographic extensions ;; ========================================================================= ;; ------------------------------------------------------------------------- ;; ---- Optional AES extensions ;; ------------------------------------------------------------------------- ;; Includes: ;; - AESD ;; - AESE ;; - AESIMC ;; - AESMC ;; ------------------------------------------------------------------------- ;; AESD and AESE. (define_insn "aarch64_sve2_aes" [(set (match_operand:VNx16QI 0 "register_operand" "=w") (unspec:VNx16QI [(xor:VNx16QI (match_operand:VNx16QI 1 "register_operand" "%0") (match_operand:VNx16QI 2 "register_operand" "w"))] CRYPTO_AES))] "TARGET_SVE2_AES" "aes\t%0.b, %0.b, %2.b" [(set_attr "type" "crypto_aese")] ) ;; AESMC and AESIMC. These instructions do not take MOVPRFX. (define_insn "aarch64_sve2_aes" [(set (match_operand:VNx16QI 0 "register_operand" "=w") (unspec:VNx16QI [(match_operand:VNx16QI 1 "register_operand" "0")] CRYPTO_AESMC))] "TARGET_SVE2_AES" "aes\t%0.b, %0.b" [(set_attr "type" "crypto_aesmc")] ) ;; When AESE/AESMC and AESD/AESIMC fusion is enabled, we really want ;; to keep the two together and enforce the register dependency without ;; scheduling or register allocation messing up the order or introducing ;; moves inbetween. Mash the two together during combine. (define_insn "*aarch64_sve2_aese_fused" [(set (match_operand:VNx16QI 0 "register_operand" "=w") (unspec:VNx16QI [(unspec:VNx16QI [(xor:VNx16QI (match_operand:VNx16QI 1 "register_operand" "%0") (match_operand:VNx16QI 2 "register_operand" "w"))] UNSPEC_AESE)] UNSPEC_AESMC))] "TARGET_SVE2_AES && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)" "aese\t%0.b, %0.b, %2.b\;aesmc\t%0.b, %0.b" [(set_attr "type" "crypto_aese") (set_attr "length" "8")] ) (define_insn "*aarch64_sve2_aesd_fused" [(set (match_operand:VNx16QI 0 "register_operand" "=w") (unspec:VNx16QI [(unspec:VNx16QI [(xor:VNx16QI (match_operand:VNx16QI 1 "register_operand" "%0") (match_operand:VNx16QI 2 "register_operand" "w"))] UNSPEC_AESD)] UNSPEC_AESIMC))] "TARGET_SVE2_AES && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)" "aesd\t%0.b, %0.b, %2.b\;aesimc\t%0.b, %0.b" [(set_attr "type" "crypto_aese") (set_attr "length" "8")] ) ;; ------------------------------------------------------------------------- ;; ---- Optional SHA-3 extensions ;; ------------------------------------------------------------------------- ;; Includes: ;; - RAX1 ;; ------------------------------------------------------------------------- (define_insn "aarch64_sve2_rax1" [(set (match_operand:VNx2DI 0 "register_operand" "=w") (xor:VNx2DI (rotate:VNx2DI (match_operand:VNx2DI 2 "register_operand" "w") (const_int 1)) (match_operand:VNx2DI 1 "register_operand" "w")))] "TARGET_SVE2_SHA3" "rax1\t%0.d, %1.d, %2.d" [(set_attr "type" "crypto_sha3")] ) ;; ------------------------------------------------------------------------- ;; ---- Optional SM4 extensions ;; ------------------------------------------------------------------------- ;; Includes: ;; - SM4E ;; - SM4EKEY ;; ------------------------------------------------------------------------- ;; These instructions do not take MOVPRFX. (define_insn "aarch64_sve2_sm4e" [(set (match_operand:VNx4SI 0 "register_operand" "=w") (unspec:VNx4SI [(match_operand:VNx4SI 1 "register_operand" "0") (match_operand:VNx4SI 2 "register_operand" "w")] UNSPEC_SM4E))] "TARGET_SVE2_SM4" "sm4e\t%0.s, %0.s, %2.s" [(set_attr "type" "crypto_sm4")] ) (define_insn "aarch64_sve2_sm4ekey" [(set (match_operand:VNx4SI 0 "register_operand" "=w") (unspec:VNx4SI [(match_operand:VNx4SI 1 "register_operand" "w") (match_operand:VNx4SI 2 "register_operand" "w")] UNSPEC_SM4EKEY))] "TARGET_SVE2_SM4" "sm4ekey\t%0.s, %1.s, %2.s" [(set_attr "type" "crypto_sm4")] )