;; Copyright (C) 2016-2020 Free Software Foundation, Inc.
;; This file is free software; you can redistribute it and/or modify it under
;; the terms of the GNU General Public License as published by the Free
;; Software Foundation; either version 3 of the License, or (at your option)
;; any later version.
;; This file is distributed in the hope that it will be useful, but WITHOUT
;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
;; for more details.
;; You should have received a copy of the GNU General Public License
;; along with GCC; see the file COPYING3. If not see
;; .
;; {{{ Vector iterators
; Vector modes for specific types
; (This will make more sense when there are multiple vector sizes)
(define_mode_iterator V_QI
[V64QI])
(define_mode_iterator V_HI
[V64HI])
(define_mode_iterator V_HF
[V64HF])
(define_mode_iterator V_SI
[V64SI])
(define_mode_iterator V_SF
[V64SF])
(define_mode_iterator V_DI
[V64DI])
(define_mode_iterator V_DF
[V64DF])
; Vector modes for sub-dword modes
(define_mode_iterator V_QIHI
[V64QI V64HI])
; Vector modes for one vector register
(define_mode_iterator V_1REG
[V64QI V64HI V64SI V64HF V64SF])
(define_mode_iterator V_INT_1REG
[V64QI V64HI V64SI])
(define_mode_iterator V_INT_1REG_ALT
[V64QI V64HI V64SI])
(define_mode_iterator V_FP_1REG
[V64HF V64SF])
; Vector modes for two vector registers
(define_mode_iterator V_2REG
[V64DI V64DF])
; Vector modes with native support
(define_mode_iterator V_noQI
[V64HI V64HF V64SI V64SF V64DI V64DF])
(define_mode_iterator V_noHI
[V64HF V64SI V64SF V64DI V64DF])
(define_mode_iterator V_INT_noQI
[V64HI V64SI V64DI])
; All of above
(define_mode_iterator V_ALL
[V64QI V64HI V64HF V64SI V64SF V64DI V64DF])
(define_mode_iterator V_ALL_ALT
[V64QI V64HI V64HF V64SI V64SF V64DI V64DF])
(define_mode_iterator V_INT
[V64QI V64HI V64SI V64DI])
(define_mode_iterator V_FP
[V64HF V64SF V64DF])
(define_mode_attr scalar_mode
[(V64QI "qi") (V64HI "hi") (V64SI "si")
(V64HF "hf") (V64SF "sf") (V64DI "di") (V64DF "df")])
(define_mode_attr SCALAR_MODE
[(V64QI "QI") (V64HI "HI") (V64SI "SI")
(V64HF "HF") (V64SF "SF") (V64DI "DI") (V64DF "DF")])
(define_mode_attr vnsi
[(V64QI "v64si") (V64HI "v64si") (V64HF "v64si") (V64SI "v64si")
(V64SF "v64si") (V64DI "v64si") (V64DF "v64si")])
(define_mode_attr VnSI
[(V64QI "V64SI") (V64HI "V64SI") (V64HF "V64SI") (V64SI "V64SI")
(V64SF "V64SI") (V64DI "V64SI") (V64DF "V64SI")])
(define_mode_attr vndi
[(V64QI "v64di") (V64HI "v64di") (V64HF "v64di") (V64SI "v64di")
(V64SF "v64di") (V64DI "v64di") (V64DF "v64di")])
(define_mode_attr VnDI
[(V64QI "V64DI") (V64HI "V64DI") (V64HF "V64DI") (V64SI "V64DI")
(V64SF "V64DI") (V64DI "V64DI") (V64DF "V64DI")])
(define_mode_attr sdwa [(V64QI "BYTE_0") (V64HI "WORD_0") (V64SI "DWORD")])
;; }}}
;; {{{ Substitutions
(define_subst_attr "exec" "vec_merge"
"" "_exec")
(define_subst_attr "exec_clobber" "vec_merge_with_clobber"
"" "_exec")
(define_subst_attr "exec_vcc" "vec_merge_with_vcc"
"" "_exec")
(define_subst_attr "exec_scatter" "scatter_store"
"" "_exec")
(define_subst "vec_merge"
[(set (match_operand:V_ALL 0)
(match_operand:V_ALL 1))]
""
[(set (match_dup 0)
(vec_merge:V_ALL
(match_dup 1)
(match_operand:V_ALL 3 "gcn_register_or_unspec_operand" "U0")
(match_operand:DI 4 "gcn_exec_reg_operand" "e")))])
(define_subst "vec_merge_with_clobber"
[(set (match_operand:V_ALL 0)
(match_operand:V_ALL 1))
(clobber (match_operand 2))]
""
[(set (match_dup 0)
(vec_merge:V_ALL
(match_dup 1)
(match_operand:V_ALL 3 "gcn_register_or_unspec_operand" "U0")
(match_operand:DI 4 "gcn_exec_reg_operand" "e")))
(clobber (match_dup 2))])
(define_subst "vec_merge_with_vcc"
[(set (match_operand:V_ALL 0)
(match_operand:V_ALL 1))
(set (match_operand:DI 2)
(match_operand:DI 3))]
""
[(parallel
[(set (match_dup 0)
(vec_merge:V_ALL
(match_dup 1)
(match_operand:V_ALL 4 "gcn_register_or_unspec_operand" "U0")
(match_operand:DI 5 "gcn_exec_reg_operand" "e")))
(set (match_dup 2)
(and:DI (match_dup 3)
(reg:DI EXEC_REG)))])])
(define_subst "scatter_store"
[(set (mem:BLK (scratch))
(unspec:BLK
[(match_operand 0)
(match_operand 1)
(match_operand 2)
(match_operand 3)]
UNSPEC_SCATTER))]
""
[(set (mem:BLK (scratch))
(unspec:BLK
[(match_dup 0)
(match_dup 1)
(match_dup 2)
(match_dup 3)
(match_operand:DI 4 "gcn_exec_reg_operand" "e")]
UNSPEC_SCATTER))])
;; }}}
;; {{{ Vector moves
; This is the entry point for all vector register moves. Memory accesses can
; come this way also, but will more usually use the reload_in/out,
; gather/scatter, maskload/store, etc.
(define_expand "mov"
[(set (match_operand:V_ALL 0 "nonimmediate_operand")
(match_operand:V_ALL 1 "general_operand"))]
""
{
if (MEM_P (operands[0]) && !lra_in_progress && !reload_completed)
{
operands[1] = force_reg (mode, operands[1]);
rtx scratch = gen_rtx_SCRATCH (mode);
rtx a = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0]));
rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0]));
rtx expr = gcn_expand_scalar_to_vector_address (mode, NULL,
operands[0],
scratch);
emit_insn (gen_scatter_expr (expr, operands[1], a, v));
DONE;
}
else if (MEM_P (operands[1]) && !lra_in_progress && !reload_completed)
{
rtx scratch = gen_rtx_SCRATCH (mode);
rtx a = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
rtx expr = gcn_expand_scalar_to_vector_address (mode, NULL,
operands[1],
scratch);
emit_insn (gen_gather_expr (operands[0], expr, a, v));
DONE;
}
else if ((MEM_P (operands[0]) || MEM_P (operands[1])))
{
gcc_assert (!reload_completed);
rtx scratch = gen_reg_rtx (mode);
emit_insn (gen_mov_sgprbase (operands[0], operands[1], scratch));
DONE;
}
})
; A pseudo instruction that helps LRA use the "U0" constraint.
(define_insn "mov_unspec"
[(set (match_operand:V_ALL 0 "nonimmediate_operand" "=v")
(match_operand:V_ALL 1 "gcn_unspec_operand" " U"))]
""
""
[(set_attr "type" "unknown")
(set_attr "length" "0")])
(define_insn "*mov"
[(set (match_operand:V_1REG 0 "nonimmediate_operand" "=v,v")
(match_operand:V_1REG 1 "general_operand" "vA,B"))]
""
"v_mov_b32\t%0, %1"
[(set_attr "type" "vop1,vop1")
(set_attr "length" "4,8")])
(define_insn "mov_exec"
[(set (match_operand:V_1REG 0 "nonimmediate_operand" "=v, v, v, v, v, m")
(vec_merge:V_1REG
(match_operand:V_1REG 1 "general_operand" "vA, B, v,vA, m, v")
(match_operand:V_1REG 2 "gcn_alu_or_unspec_operand"
"U0,U0,vA,vA,U0,U0")
(match_operand:DI 3 "register_operand" " e, e,cV,Sv, e, e")))
(clobber (match_scratch: 4 "=X, X, X, X,&v,&v"))]
"!MEM_P (operands[0]) || REG_P (operands[1])"
"@
v_mov_b32\t%0, %1
v_mov_b32\t%0, %1
v_cndmask_b32\t%0, %2, %1, vcc
v_cndmask_b32\t%0, %2, %1, %3
#
#"
[(set_attr "type" "vop1,vop1,vop2,vop3a,*,*")
(set_attr "length" "4,8,4,8,16,16")])
; This variant does not accept an unspec, but does permit MEM
; read/modify/write which is necessary for maskstore.
;(define_insn "*mov_exec_match"
; [(set (match_operand:V_1REG 0 "nonimmediate_operand" "=v,v, v, m")
; (vec_merge:V_1REG
; (match_operand:V_1REG 1 "general_operand" "vA,B, m, v")
; (match_dup 0)
; (match_operand:DI 2 "gcn_exec_reg_operand" " e,e, e, e")))
; (clobber (match_scratch: 3 "=X,X,&v,&v"))]
; "!MEM_P (operands[0]) || REG_P (operands[1])"
; "@
; v_mov_b32\t%0, %1
; v_mov_b32\t%0, %1
; #
; #"
; [(set_attr "type" "vop1,vop1,*,*")
; (set_attr "length" "4,8,16,16")])
(define_insn "*mov"
[(set (match_operand:V_2REG 0 "nonimmediate_operand" "=v")
(match_operand:V_2REG 1 "general_operand" "vDB"))]
""
{
if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1]))
return "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1";
else
return "v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1";
}
[(set_attr "type" "vmult")
(set_attr "length" "16")])
(define_insn "mov_exec"
[(set (match_operand:V_2REG 0 "nonimmediate_operand" "= v, v, v, v, m")
(vec_merge:V_2REG
(match_operand:V_2REG 1 "general_operand" "vDB, v0, v0, m, v")
(match_operand:V_2REG 2 "gcn_alu_or_unspec_operand"
" U0,vDA0,vDA0,U0,U0")
(match_operand:DI 3 "register_operand" " e, cV, Sv, e, e")))
(clobber (match_scratch: 4 "= X, X, X,&v,&v"))]
"!MEM_P (operands[0]) || REG_P (operands[1])"
{
if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1]))
switch (which_alternative)
{
case 0:
return "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1";
case 1:
return "v_cndmask_b32\t%L0, %L2, %L1, vcc\;"
"v_cndmask_b32\t%H0, %H2, %H1, vcc";
case 2:
return "v_cndmask_b32\t%L0, %L2, %L1, %3\;"
"v_cndmask_b32\t%H0, %H2, %H1, %3";
}
else
switch (which_alternative)
{
case 0:
return "v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1";
case 1:
return "v_cndmask_b32\t%H0, %H2, %H1, vcc\;"
"v_cndmask_b32\t%L0, %L2, %L1, vcc";
case 2:
return "v_cndmask_b32\t%H0, %H2, %H1, %3\;"
"v_cndmask_b32\t%L0, %L2, %L1, %3";
}
return "#";
}
[(set_attr "type" "vmult,vmult,vmult,*,*")
(set_attr "length" "16,16,16,16,16")])
; This variant does not accept an unspec, but does permit MEM
; read/modify/write which is necessary for maskstore.
;(define_insn "*mov_exec_match"
; [(set (match_operand:V_2REG 0 "nonimmediate_operand" "=v, v, m")
; (vec_merge:V_2REG
; (match_operand:V_2REG 1 "general_operand" "vDB, m, v")
; (match_dup 0)
; (match_operand:DI 2 "gcn_exec_reg_operand" " e, e, e")))
; (clobber (match_scratch: 3 "=X,&v,&v"))]
; "!MEM_P (operands[0]) || REG_P (operands[1])"
; "@
; * if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1])) \
; return \"v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\"; \
; else \
; return \"v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1\";
; #
; #"
; [(set_attr "type" "vmult,*,*")
; (set_attr "length" "16,16,16")])
; A SGPR-base load looks like:
; v, Sv
;
; There's no hardware instruction that corresponds to this, but vector base
; addresses are placed in an SGPR because it is easier to add to a vector.
; We also have a temporary vT, and the vector v1 holding numbered lanes.
;
; Rewrite as:
; vT = v1 << log2(element-size)
; vT += Sv
; flat_load v, vT
(define_insn "mov_sgprbase"
[(set (match_operand:V_1REG 0 "nonimmediate_operand" "= v, v, v, m")
(unspec:V_1REG
[(match_operand:V_1REG 1 "general_operand" " vA,vB, m, v")]
UNSPEC_SGPRBASE))
(clobber (match_operand: 2 "register_operand" "=&v,&v,&v,&v"))]
"lra_in_progress || reload_completed"
"@
v_mov_b32\t%0, %1
v_mov_b32\t%0, %1
#
#"
[(set_attr "type" "vop1,vop1,*,*")
(set_attr "length" "4,8,12,12")])
(define_insn "mov_sgprbase"
[(set (match_operand:V_2REG 0 "nonimmediate_operand" "= v, v, m")
(unspec:V_2REG
[(match_operand:V_2REG 1 "general_operand" "vDB, m, v")]
UNSPEC_SGPRBASE))
(clobber (match_operand: 2 "register_operand" "=&v,&v,&v"))]
"lra_in_progress || reload_completed"
"@
* if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1])) \
return \"v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\"; \
else \
return \"v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1\";
#
#"
[(set_attr "type" "vmult,*,*")
(set_attr "length" "8,12,12")])
; reload_in was once a standard name, but here it's only referenced by
; gcn_secondary_reload. It allows a reload with a scratch register.
(define_expand "reload_in"
[(set (match_operand:V_ALL 0 "register_operand" "= v")
(match_operand:V_ALL 1 "memory_operand" " m"))
(clobber (match_operand: 2 "register_operand" "=&v"))]
""
{
emit_insn (gen_mov_sgprbase (operands[0], operands[1], operands[2]));
DONE;
})
; reload_out is similar to reload_in, above.
(define_expand "reload_out"
[(set (match_operand:V_ALL 0 "memory_operand" "= m")
(match_operand:V_ALL 1 "register_operand" " v"))
(clobber (match_operand: 2 "register_operand" "=&v"))]
""
{
emit_insn (gen_mov_sgprbase (operands[0], operands[1], operands[2]));
DONE;
})
; Expand scalar addresses into gather/scatter patterns
(define_split
[(set (match_operand:V_ALL 0 "memory_operand")
(unspec:V_ALL
[(match_operand:V_ALL 1 "general_operand")]
UNSPEC_SGPRBASE))
(clobber (match_scratch: 2))]
""
[(set (mem:BLK (scratch))
(unspec:BLK [(match_dup 5) (match_dup 1) (match_dup 6) (match_dup 7)]
UNSPEC_SCATTER))]
{
operands[5] = gcn_expand_scalar_to_vector_address (mode, NULL,
operands[0],
operands[2]);
operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0]));
operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0]));
})
(define_split
[(set (match_operand:V_ALL 0 "memory_operand")
(vec_merge:V_ALL
(match_operand:V_ALL 1 "general_operand")
(match_operand:V_ALL 2 "")
(match_operand:DI 3 "gcn_exec_reg_operand")))
(clobber (match_scratch: 4))]
""
[(set (mem:BLK (scratch))
(unspec:BLK [(match_dup 5) (match_dup 1)
(match_dup 6) (match_dup 7) (match_dup 3)]
UNSPEC_SCATTER))]
{
operands[5] = gcn_expand_scalar_to_vector_address (mode,
operands[3],
operands[0],
operands[4]);
operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0]));
operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0]));
})
(define_split
[(set (match_operand:V_ALL 0 "nonimmediate_operand")
(unspec:V_ALL
[(match_operand:V_ALL 1 "memory_operand")]
UNSPEC_SGPRBASE))
(clobber (match_scratch: 2))]
""
[(set (match_dup 0)
(unspec:V_ALL [(match_dup 5) (match_dup 6) (match_dup 7)
(mem:BLK (scratch))]
UNSPEC_GATHER))]
{
operands[5] = gcn_expand_scalar_to_vector_address (mode, NULL,
operands[1],
operands[2]);
operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
})
(define_split
[(set (match_operand:V_ALL 0 "nonimmediate_operand")
(vec_merge:V_ALL
(match_operand:V_ALL 1 "memory_operand")
(match_operand:V_ALL 2 "")
(match_operand:DI 3 "gcn_exec_reg_operand")))
(clobber (match_scratch: 4))]
""
[(set (match_dup 0)
(vec_merge:V_ALL
(unspec:V_ALL [(match_dup 5) (match_dup 6) (match_dup 7)
(mem:BLK (scratch))]
UNSPEC_GATHER)
(match_dup 2)
(match_dup 3)))]
{
operands[5] = gcn_expand_scalar_to_vector_address (mode,
operands[3],
operands[1],
operands[4]);
operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
})
; TODO: Add zero/sign extending variants.
;; }}}
;; {{{ Lane moves
; v_writelane and v_readlane work regardless of exec flags.
; We allow source to be scratch.
;
; FIXME these should take A immediates
(define_insn "*vec_set"
[(set (match_operand:V_1REG 0 "register_operand" "= v")
(vec_merge:V_1REG
(vec_duplicate:V_1REG
(match_operand: 1 "register_operand" " Sv"))
(match_operand:V_1REG 3 "gcn_register_or_unspec_operand" " U0")
(ashift (const_int 1)
(match_operand:SI 2 "gcn_alu_operand" "SvB"))))]
""
"v_writelane_b32 %0, %1, %2"
[(set_attr "type" "vop3a")
(set_attr "length" "8")
(set_attr "exec" "none")
(set_attr "laneselect" "yes")])
; FIXME: 64bit operations really should be splitters, but I am not sure how
; to represent vertical subregs.
(define_insn "*vec_set"
[(set (match_operand:V_2REG 0 "register_operand" "= v")
(vec_merge:V_2REG
(vec_duplicate:V_2REG
(match_operand: 1 "register_operand" " Sv"))
(match_operand:V_2REG 3 "gcn_register_or_unspec_operand" " U0")
(ashift (const_int 1)
(match_operand:SI 2 "gcn_alu_operand" "SvB"))))]
""
"v_writelane_b32 %L0, %L1, %2\;v_writelane_b32 %H0, %H1, %2"
[(set_attr "type" "vmult")
(set_attr "length" "16")
(set_attr "exec" "none")
(set_attr "laneselect" "yes")])
(define_expand "vec_set"
[(set (match_operand:V_ALL 0 "register_operand")
(vec_merge:V_ALL
(vec_duplicate:V_ALL
(match_operand: 1 "register_operand"))
(match_dup 0)
(ashift (const_int 1) (match_operand:SI 2 "gcn_alu_operand"))))]
"")
(define_insn "*vec_set_1"
[(set (match_operand:V_1REG 0 "register_operand" "=v")
(vec_merge:V_1REG
(vec_duplicate:V_1REG
(match_operand: 1 "register_operand" "Sv"))
(match_operand:V_1REG 3 "gcn_register_or_unspec_operand" "U0")
(match_operand:SI 2 "const_int_operand" " i")))]
"((unsigned) exact_log2 (INTVAL (operands[2])) < GET_MODE_NUNITS (mode))"
{
operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2])));
return "v_writelane_b32 %0, %1, %2";
}
[(set_attr "type" "vop3a")
(set_attr "length" "8")
(set_attr "exec" "none")
(set_attr "laneselect" "yes")])
(define_insn "*vec_set_1"
[(set (match_operand:V_2REG 0 "register_operand" "=v")
(vec_merge:V_2REG
(vec_duplicate:V_2REG
(match_operand: 1 "register_operand" "Sv"))
(match_operand:V_2REG 3 "gcn_register_or_unspec_operand" "U0")
(match_operand:SI 2 "const_int_operand" " i")))]
"((unsigned) exact_log2 (INTVAL (operands[2])) < GET_MODE_NUNITS (mode))"
{
operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2])));
return "v_writelane_b32 %L0, %L1, %2\;v_writelane_b32 %H0, %H1, %2";
}
[(set_attr "type" "vmult")
(set_attr "length" "16")
(set_attr "exec" "none")
(set_attr "laneselect" "yes")])
(define_insn "vec_duplicate"
[(set (match_operand:V_1REG 0 "register_operand" "=v")
(vec_duplicate:V_1REG
(match_operand: 1 "gcn_alu_operand" "SvB")))]
""
"v_mov_b32\t%0, %1"
[(set_attr "type" "vop3a")
(set_attr "length" "8")])
(define_insn "vec_duplicate"
[(set (match_operand:V_2REG 0 "register_operand" "= v")
(vec_duplicate:V_2REG
(match_operand: 1 "gcn_alu_operand" "SvDB")))]
""
"v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1"
[(set_attr "type" "vop3a")
(set_attr "length" "16")])
(define_insn "vec_extract"
[(set (match_operand: 0 "register_operand" "=Sg")
(vec_select:
(match_operand:V_1REG 1 "register_operand" " v")
(parallel [(match_operand:SI 2 "gcn_alu_operand" "SvB")])))]
""
"v_readlane_b32 %0, %1, %2"
[(set_attr "type" "vop3a")
(set_attr "length" "8")
(set_attr "exec" "none")
(set_attr "laneselect" "yes")])
(define_insn "vec_extract"
[(set (match_operand: 0 "register_operand" "=&Sg")
(vec_select:
(match_operand:V_2REG 1 "register_operand" " v")
(parallel [(match_operand:SI 2 "gcn_alu_operand" " SvB")])))]
""
"v_readlane_b32 %L0, %L1, %2\;v_readlane_b32 %H0, %H1, %2"
[(set_attr "type" "vmult")
(set_attr "length" "16")
(set_attr "exec" "none")
(set_attr "laneselect" "yes")])
(define_expand "extract_last_"
[(match_operand: 0 "register_operand")
(match_operand:DI 1 "gcn_alu_operand")
(match_operand:V_ALL 2 "register_operand")]
"can_create_pseudo_p ()"
{
rtx dst = operands[0];
rtx mask = operands[1];
rtx vect = operands[2];
rtx tmpreg = gen_reg_rtx (SImode);
emit_insn (gen_clzdi2 (tmpreg, mask));
emit_insn (gen_subsi3 (tmpreg, GEN_INT (63), tmpreg));
emit_insn (gen_vec_extract (dst, vect, tmpreg));
DONE;
})
(define_expand "fold_extract_last_"
[(match_operand: 0 "register_operand")
(match_operand: 1 "gcn_alu_operand")
(match_operand:DI 2 "gcn_alu_operand")
(match_operand:V_ALL 3 "register_operand")]
"can_create_pseudo_p ()"
{
rtx dst = operands[0];
rtx default_value = operands[1];
rtx mask = operands[2];
rtx vect = operands[3];
rtx else_label = gen_label_rtx ();
rtx end_label = gen_label_rtx ();
rtx cond = gen_rtx_EQ (VOIDmode, mask, const0_rtx);
emit_jump_insn (gen_cbranchdi4 (cond, mask, const0_rtx, else_label));
emit_insn (gen_extract_last_ (dst, mask, vect));
emit_jump_insn (gen_jump (end_label));
emit_barrier ();
emit_label (else_label);
emit_move_insn (dst, default_value);
emit_label (end_label);
DONE;
})
(define_expand "vec_init"
[(match_operand:V_ALL 0 "register_operand")
(match_operand 1)]
""
{
gcn_expand_vector_init (operands[0], operands[1]);
DONE;
})
;; }}}
;; {{{ Scatter / Gather
;; GCN does not have an instruction for loading a vector from contiguous
;; memory so *all* loads and stores are eventually converted to scatter
;; or gather.
;;
;; GCC does not permit MEM to hold vectors of addresses, so we must use an
;; unspec. The unspec formats are as follows:
;;
;; (unspec:V??
;; [()
;; ()
;; ()
;; (mem:BLK (scratch))]
;; UNSPEC_GATHER)
;;
;; (unspec:BLK
;; [()
;; (