gdb: RISC-V: Refine lr/sc sequence support
Checks
Context |
Check |
Description |
linaro-tcwg-bot/tcwg_gdb_build--master-aarch64 |
success
|
Testing passed
|
linaro-tcwg-bot/tcwg_gdb_check--master-aarch64 |
success
|
Testing passed
|
linaro-tcwg-bot/tcwg_gdb_build--master-arm |
success
|
Testing passed
|
linaro-tcwg-bot/tcwg_gdb_check--master-arm |
success
|
Testing passed
|
Commit Message
Per RISC-V spec, the lr/sc sequence can consist of up to 16 instructions, and we
cannot insert breakpoints in the middle of this sequence. Before this, we only
detected a specific pattern (the most common one). This patch improves this part
and now supports more complex pattern detection.
gdb/ChangeLog:
* gdb/riscv-tdep.c (class riscv_insn): Add more needed opcode enums.
(riscv_insn::decode): Decode newly added opcodes.
(riscv_insn_is_non_cti_and_allowed_in_atomic_sequence): New.
(riscv_insn_is_direct_branch): New.
(riscv_next_pc_atomic_sequence): Removed.
(riscv_deal_with_atomic_sequence): Rename from riscv_next_pc_atomic_sequence.
(riscv_software_single_step): Adjust to use the renamed one.
Signed-off-by: Yang Liu <liuyang22@iscas.ac.cn>
---
gdb/riscv-tdep.c | 266 ++++++++++++++++++++++++++++++++++++++++-------
1 file changed, 226 insertions(+), 40 deletions(-)
Comments
On 2023/11/2 22:03, Yang Liu wrote:
> Per RISC-V spec, the lr/sc sequence can consist of up to 16 instructions, and we
> cannot insert breakpoints in the middle of this sequence. Before this, we only
> detected a specific pattern (the most common one). This patch improves this part
> and now supports more complex pattern detection.
>
> gdb/ChangeLog:
>
> * gdb/riscv-tdep.c (class riscv_insn): Add more needed opcode enums.
> (riscv_insn::decode): Decode newly added opcodes.
> (riscv_insn_is_non_cti_and_allowed_in_atomic_sequence): New.
> (riscv_insn_is_direct_branch): New.
> (riscv_next_pc_atomic_sequence): Removed.
> (riscv_deal_with_atomic_sequence): Rename from riscv_next_pc_atomic_sequence.
> (riscv_software_single_step): Adjust to use the renamed one.
>
> Signed-off-by: Yang Liu <liuyang22@iscas.ac.cn>
> ---
> gdb/riscv-tdep.c | 266 ++++++++++++++++++++++++++++++++++++++++-------
> 1 file changed, 226 insertions(+), 40 deletions(-)
>
> diff --git a/gdb/riscv-tdep.c b/gdb/riscv-tdep.c
> index 3a2891c2c92..faff1aab420 100644
> --- a/gdb/riscv-tdep.c
> +++ b/gdb/riscv-tdep.c
> @@ -1578,8 +1578,34 @@ class riscv_insn
> BLTU,
> BGEU,
> /* These are needed for stepping over atomic sequences. */
> - LR,
> - SC,
> + SLTI,
> + SLTIU,
> + XORI,
> + ORI,
> + ANDI,
> + SLLI,
> + SLLIW,
> + SRLI,
> + SRLIW,
> + SRAI,
> + SRAIW,
> + SUB,
> + SUBW,
> + SLL,
> + SLLW,
> + SLT,
> + SLTU,
> + XOR,
> + SRL,
> + SRLW,
> + SRA,
> + SRAW,
> + OR,
> + AND,
> + LR_W,
> + LR_D,
> + SC_W,
> + SC_D,
> /* This instruction is used to do a syscall. */
> ECALL,
>
> @@ -1768,6 +1794,13 @@ class riscv_insn
> m_imm.s = EXTRACT_CBTYPE_IMM (ival);
> }
>
> + void decode_ca_type_insn (enum opcode opcode, ULONGEST ival)
> + {
> + m_opcode = opcode;
> + m_rs1 = decode_register_index_short (ival, OP_SH_CRS1S);
> + m_rs2 = decode_register_index_short (ival, OP_SH_CRS2S);
> + }
> +
> /* Fetch instruction from target memory at ADDR, return the content of
> the instruction, and update LEN with the instruction length. */
> static ULONGEST fetch_instruction (struct gdbarch *gdbarch,
> @@ -1882,14 +1915,62 @@ riscv_insn::decode (struct gdbarch *gdbarch, CORE_ADDR pc)
> decode_b_type_insn (BLTU, ival);
> else if (is_bgeu_insn (ival))
> decode_b_type_insn (BGEU, ival);
> + else if (is_slti_insn(ival))
> + decode_i_type_insn (SLTI, ival);
> + else if (is_sltiu_insn(ival))
> + decode_i_type_insn (SLTIU, ival);
> + else if (is_xori_insn(ival))
> + decode_i_type_insn (XORI, ival);
> + else if (is_ori_insn(ival))
> + decode_i_type_insn (ORI, ival);
> + else if (is_andi_insn(ival))
> + decode_i_type_insn (ANDI, ival);
> + else if (is_slli_insn(ival))
> + decode_i_type_insn (SLLI, ival);
> + else if (is_slliw_insn(ival))
> + decode_i_type_insn (SLLIW, ival);
> + else if (is_srli_insn(ival))
> + decode_i_type_insn (SRLI, ival);
> + else if (is_srliw_insn(ival))
> + decode_i_type_insn (SRLIW, ival);
> + else if (is_srai_insn(ival))
> + decode_i_type_insn (SRAI, ival);
> + else if (is_sraiw_insn(ival))
> + decode_i_type_insn (SRAIW, ival);
> + else if (is_sub_insn(ival))
> + decode_r_type_insn (SUB, ival);
> + else if (is_subw_insn(ival))
> + decode_r_type_insn (SUBW, ival);
> + else if (is_sll_insn(ival))
> + decode_r_type_insn (SLL, ival);
> + else if (is_sllw_insn(ival))
> + decode_r_type_insn (SLLW, ival);
> + else if (is_slt_insn(ival))
> + decode_r_type_insn (SLT, ival);
> + else if (is_sltu_insn(ival))
> + decode_r_type_insn (SLTU, ival);
> + else if (is_xor_insn(ival))
> + decode_r_type_insn (XOR, ival);
> + else if (is_srl_insn(ival))
> + decode_r_type_insn (SRL, ival);
> + else if (is_srlw_insn(ival))
> + decode_r_type_insn (SRLW, ival);
> + else if (is_sra_insn(ival))
> + decode_r_type_insn (SRA, ival);
> + else if (is_sraw_insn(ival))
> + decode_r_type_insn (SRAW, ival);
> + else if (is_or_insn(ival))
> + decode_r_type_insn (OR, ival);
> + else if (is_and_insn(ival))
> + decode_r_type_insn (AND, ival);
> else if (is_lr_w_insn (ival))
> - decode_r_type_insn (LR, ival);
> + decode_r_type_insn (LR_W, ival);
> else if (is_lr_d_insn (ival))
> - decode_r_type_insn (LR, ival);
> + decode_r_type_insn (LR_D, ival);
> else if (is_sc_w_insn (ival))
> - decode_r_type_insn (SC, ival);
> + decode_r_type_insn (SC_W, ival);
> else if (is_sc_d_insn (ival))
> - decode_r_type_insn (SC, ival);
> + decode_r_type_insn (SC_D, ival);
> else if (is_ecall_insn (ival))
> decode_i_type_insn (ECALL, ival);
> else if (is_ld_insn (ival))
> @@ -1944,6 +2025,24 @@ riscv_insn::decode (struct gdbarch *gdbarch, CORE_ADDR pc)
> m_rd = decode_register_index (ival, OP_SH_CRS1S);
> m_imm.s = EXTRACT_CITYPE_LUI_IMM (ival);
> }
> + else if (is_c_srli_insn (ival))
> + decode_cb_type_insn (SRLI, ival);
> + else if (is_c_srai_insn (ival))
> + decode_cb_type_insn (SRAI, ival);
> + else if (is_c_andi_insn (ival))
> + decode_cb_type_insn (ANDI, ival);
> + else if (is_c_sub_insn (ival))
> + decode_ca_type_insn (SUB, ival);
> + else if (is_c_xor_insn (ival))
> + decode_ca_type_insn (XOR, ival);
> + else if (is_c_or_insn (ival))
> + decode_ca_type_insn (OR, ival);
> + else if (is_c_and_insn (ival))
> + decode_ca_type_insn (AND, ival);
> + else if (is_c_subw_insn (ival))
> + decode_ca_type_insn (SUBW, ival);
> + else if (is_c_addw_insn (ival))
> + decode_ca_type_insn (ADDW, ival);
> else if (is_c_li_insn (ival))
> decode_ci_type_insn (LI, ival);
> /* C_SD and C_FSW have the same opcode. C_SD is RV64 and RV128 only,
> @@ -4404,51 +4503,138 @@ riscv_next_pc (struct regcache *regcache, CORE_ADDR pc)
> return next_pc;
> }
>
> +/* Return true if INSN is not a control transfer instruction and is allowed to
> + appear in the middle of the lr/sc sequence. */
> +
> +static bool
> +riscv_insn_is_non_cti_and_allowed_in_atomic_sequence(const struct riscv_insn &insn)
> +{
> + switch (insn.opcode ())
> + {
> + case riscv_insn::LUI:
> + case riscv_insn::AUIPC:
> + case riscv_insn::ADDI:
> + case riscv_insn::ADDIW:
> + case riscv_insn::SLTI:
> + case riscv_insn::SLTIU:
> + case riscv_insn::XORI:
> + case riscv_insn::ORI:
> + case riscv_insn::ANDI:
> + case riscv_insn::SLLI:
> + case riscv_insn::SLLIW:
> + case riscv_insn::SRLI:
> + case riscv_insn::SRLIW:
> + case riscv_insn::SRAI:
> + case riscv_insn::ADD:
> + case riscv_insn::ADDW:
> + case riscv_insn::SRAIW:
> + case riscv_insn::SUB:
> + case riscv_insn::SUBW:
> + case riscv_insn::SLL:
> + case riscv_insn::SLLW:
> + case riscv_insn::SLT:
> + case riscv_insn::SLTU:
> + case riscv_insn::XOR:
> + case riscv_insn::SRL:
> + case riscv_insn::SRLW:
> + case riscv_insn::SRA:
> + case riscv_insn::SRAW:
> + case riscv_insn::OR:
> + case riscv_insn::AND:
> + return true;
> + }
> +
> + return false;
> +}
> +
> +/* Return true if INSN is a direct branch insctruction. */
> +
> +static bool
> +riscv_insn_is_direct_branch(const struct riscv_insn &insn)
> +{
> + switch (insn.opcode ())
> + {
> + case riscv_insn::BEQ:
> + case riscv_insn::BNE:
> + case riscv_insn::BLT:
> + case riscv_insn::BGE:
> + case riscv_insn::BLTU:
> + case riscv_insn::BGEU:
> + case riscv_insn::JAL:
> + return true;
> + }
> +
> + return false;
> +}
> +
> /* We can't put a breakpoint in the middle of a lr/sc atomic sequence, so look
> for the end of the sequence and put the breakpoint there. */
>
> -static bool
> -riscv_next_pc_atomic_sequence (struct regcache *regcache, CORE_ADDR pc,
> - CORE_ADDR *next_pc)
> +static std::vector<CORE_ADDR>
> +riscv_deal_with_atomic_sequence (struct regcache *regcache, CORE_ADDR pc)
> {
> struct gdbarch *gdbarch = regcache->arch ();
> struct riscv_insn insn;
> - CORE_ADDR cur_step_pc = pc;
> - CORE_ADDR last_addr = 0;
> + CORE_ADDR cur_step_pc = pc, next_pc;
> + std::vector<CORE_ADDR> next_pcs;
> + const int atomic_sequence_length = 16;
> + bool found_valid_atomic_sequence = false;
> + enum riscv_insn::opcode lr_opcode;
>
> /* First instruction has to be a load reserved. */
> insn.decode (gdbarch, cur_step_pc);
> - if (insn.opcode () != riscv_insn::LR)
> - return false;
> - cur_step_pc = cur_step_pc + insn.length ();
> + lr_opcode = insn.opcode ();
> + if (lr_opcode != riscv_insn::LR_D && lr_opcode != riscv_insn::LR_W)
> + return {};
>
> - /* Next instruction should be branch to exit. */
> - insn.decode (gdbarch, cur_step_pc);
> - if (insn.opcode () != riscv_insn::BNE)
> - return false;
> - last_addr = cur_step_pc + insn.imm_signed ();
> - cur_step_pc = cur_step_pc + insn.length ();
> + /* A lr/sc sequence comprise at most 16 instructions placed sequentially in memory. */
> + for (int insn_count = 0; insn_count < atomic_sequence_length; ++insn_count)
> + {
> + cur_step_pc += insn.length ();
> + insn.decode (gdbarch, cur_step_pc);
>
> - /* Next instruction should be store conditional. */
> - insn.decode (gdbarch, cur_step_pc);
> - if (insn.opcode () != riscv_insn::SC)
> - return false;
> - cur_step_pc = cur_step_pc + insn.length ();
> + /* The dynamic code executed between lr/sc can only contain instructions
> + from the base I instruction set, excluding loads, stores, backward jumps,
> + taken backward branches, JALR, FENCE, FENCE.I, and SYSTEM instructions.
> + If the C extension is supported, then compressed forms of the aforementioned
> + I instructions are also permitted. */
> +
> + if (riscv_insn_is_non_cti_and_allowed_in_atomic_sequence (insn))
> + {
> + continue;
> + }
> + /* Look for a conditional branch instruction, check if it's taken forward or not. */
> + else if (riscv_insn_is_direct_branch (insn))
> + {
> + if (insn.imm_signed () > 0)
> + next_pc = cur_step_pc + insn.imm_signed ();
Oh I forgot to do a push_back here and my simple tests didn't catch
this. I'll send a V2 patch soon. Sorry.
Best regards,
Yang
> + else
> + break;
> + }
> + /* Look for a paired SC instruction which closes the atomic sequence. */
> + else if ((insn.opcode () == riscv_insn::SC_D && lr_opcode == riscv_insn::LR_D)
> + || (insn.opcode () == riscv_insn::SC_W && lr_opcode == riscv_insn::LR_W))
> + {
> + found_valid_atomic_sequence = true;
> + }
> + else
> + break;
> + }
> +
> + if (!found_valid_atomic_sequence)
> + return {};
>
> /* Next instruction should be branch to start. */
> insn.decode (gdbarch, cur_step_pc);
> if (insn.opcode () != riscv_insn::BNE)
> - return false;
> + return {};
> if (pc != (cur_step_pc + insn.imm_signed ()))
> - return false;
> - cur_step_pc = cur_step_pc + insn.length ();
> -
> - /* We should now be at the end of the sequence. */
> - if (cur_step_pc != last_addr)
> - return false;
> + return {};
> + cur_step_pc += insn.length ();
>
> - *next_pc = cur_step_pc;
> - return true;
> + next_pc = cur_step_pc;
> + next_pcs.push_back (next_pc);
> + return next_pcs;
> }
>
> /* This is called just before we want to resume the inferior, if we want to
> @@ -4458,14 +4644,14 @@ riscv_next_pc_atomic_sequence (struct regcache *regcache, CORE_ADDR pc,
> std::vector<CORE_ADDR>
> riscv_software_single_step (struct regcache *regcache)
> {
> - CORE_ADDR pc, next_pc;
> -
> - pc = regcache_read_pc (regcache);
> + CORE_ADDR cur_pc = regcache_read_pc (regcache), next_pc;
> + std::vector<CORE_ADDR> next_pcs
> + = riscv_deal_with_atomic_sequence (regcache, cur_pc);
>
> - if (riscv_next_pc_atomic_sequence (regcache, pc, &next_pc))
> - return {next_pc};
> + if (!next_pcs.empty ())
> + return next_pcs;
>
> - next_pc = riscv_next_pc (regcache, pc);
> + next_pc = riscv_next_pc (regcache, cur_pc);
>
> return {next_pc};
> }
On Thu, 02 Nov 2023 08:21:16 PDT (-0700), liuyang22@iscas.ac.cn wrote:
>
> On 2023/11/2 22:03, Yang Liu wrote:
>> Per RISC-V spec, the lr/sc sequence can consist of up to 16 instructions, and we
>> cannot insert breakpoints in the middle of this sequence. Before this, we only
>> detected a specific pattern (the most common one). This patch improves this part
>> and now supports more complex pattern detection.
Thanks. The general idea here seems pretty reasonable, but I haven't
looked that closely at the code. I'm super behind on Linux stuff right
now, so I might be slower that usual...
>> gdb/ChangeLog:
>>
>> * gdb/riscv-tdep.c (class riscv_insn): Add more needed opcode enums.
>> (riscv_insn::decode): Decode newly added opcodes.
>> (riscv_insn_is_non_cti_and_allowed_in_atomic_sequence): New.
>> (riscv_insn_is_direct_branch): New.
>> (riscv_next_pc_atomic_sequence): Removed.
>> (riscv_deal_with_atomic_sequence): Rename from riscv_next_pc_atomic_sequence.
>> (riscv_software_single_step): Adjust to use the renamed one.
>>
>> Signed-off-by: Yang Liu <liuyang22@iscas.ac.cn>
>> ---
>> gdb/riscv-tdep.c | 266 ++++++++++++++++++++++++++++++++++++++++-------
>> 1 file changed, 226 insertions(+), 40 deletions(-)
>>
>> diff --git a/gdb/riscv-tdep.c b/gdb/riscv-tdep.c
>> index 3a2891c2c92..faff1aab420 100644
>> --- a/gdb/riscv-tdep.c
>> +++ b/gdb/riscv-tdep.c
>> @@ -1578,8 +1578,34 @@ class riscv_insn
>> BLTU,
>> BGEU,
>> /* These are needed for stepping over atomic sequences. */
>> - LR,
>> - SC,
>> + SLTI,
>> + SLTIU,
>> + XORI,
>> + ORI,
>> + ANDI,
>> + SLLI,
>> + SLLIW,
>> + SRLI,
>> + SRLIW,
>> + SRAI,
>> + SRAIW,
>> + SUB,
>> + SUBW,
>> + SLL,
>> + SLLW,
>> + SLT,
>> + SLTU,
>> + XOR,
>> + SRL,
>> + SRLW,
>> + SRA,
>> + SRAW,
>> + OR,
>> + AND,
>> + LR_W,
>> + LR_D,
>> + SC_W,
>> + SC_D,
>> /* This instruction is used to do a syscall. */
>> ECALL,
>>
>> @@ -1768,6 +1794,13 @@ class riscv_insn
>> m_imm.s = EXTRACT_CBTYPE_IMM (ival);
>> }
>>
>> + void decode_ca_type_insn (enum opcode opcode, ULONGEST ival)
>> + {
>> + m_opcode = opcode;
>> + m_rs1 = decode_register_index_short (ival, OP_SH_CRS1S);
>> + m_rs2 = decode_register_index_short (ival, OP_SH_CRS2S);
>> + }
>> +
>> /* Fetch instruction from target memory at ADDR, return the content of
>> the instruction, and update LEN with the instruction length. */
>> static ULONGEST fetch_instruction (struct gdbarch *gdbarch,
>> @@ -1882,14 +1915,62 @@ riscv_insn::decode (struct gdbarch *gdbarch, CORE_ADDR pc)
>> decode_b_type_insn (BLTU, ival);
>> else if (is_bgeu_insn (ival))
>> decode_b_type_insn (BGEU, ival);
>> + else if (is_slti_insn(ival))
>> + decode_i_type_insn (SLTI, ival);
>> + else if (is_sltiu_insn(ival))
>> + decode_i_type_insn (SLTIU, ival);
>> + else if (is_xori_insn(ival))
>> + decode_i_type_insn (XORI, ival);
>> + else if (is_ori_insn(ival))
>> + decode_i_type_insn (ORI, ival);
>> + else if (is_andi_insn(ival))
>> + decode_i_type_insn (ANDI, ival);
>> + else if (is_slli_insn(ival))
>> + decode_i_type_insn (SLLI, ival);
>> + else if (is_slliw_insn(ival))
>> + decode_i_type_insn (SLLIW, ival);
>> + else if (is_srli_insn(ival))
>> + decode_i_type_insn (SRLI, ival);
>> + else if (is_srliw_insn(ival))
>> + decode_i_type_insn (SRLIW, ival);
>> + else if (is_srai_insn(ival))
>> + decode_i_type_insn (SRAI, ival);
>> + else if (is_sraiw_insn(ival))
>> + decode_i_type_insn (SRAIW, ival);
>> + else if (is_sub_insn(ival))
>> + decode_r_type_insn (SUB, ival);
>> + else if (is_subw_insn(ival))
>> + decode_r_type_insn (SUBW, ival);
>> + else if (is_sll_insn(ival))
>> + decode_r_type_insn (SLL, ival);
>> + else if (is_sllw_insn(ival))
>> + decode_r_type_insn (SLLW, ival);
>> + else if (is_slt_insn(ival))
>> + decode_r_type_insn (SLT, ival);
>> + else if (is_sltu_insn(ival))
>> + decode_r_type_insn (SLTU, ival);
>> + else if (is_xor_insn(ival))
>> + decode_r_type_insn (XOR, ival);
>> + else if (is_srl_insn(ival))
>> + decode_r_type_insn (SRL, ival);
>> + else if (is_srlw_insn(ival))
>> + decode_r_type_insn (SRLW, ival);
>> + else if (is_sra_insn(ival))
>> + decode_r_type_insn (SRA, ival);
>> + else if (is_sraw_insn(ival))
>> + decode_r_type_insn (SRAW, ival);
>> + else if (is_or_insn(ival))
>> + decode_r_type_insn (OR, ival);
>> + else if (is_and_insn(ival))
>> + decode_r_type_insn (AND, ival);
>> else if (is_lr_w_insn (ival))
>> - decode_r_type_insn (LR, ival);
>> + decode_r_type_insn (LR_W, ival);
>> else if (is_lr_d_insn (ival))
>> - decode_r_type_insn (LR, ival);
>> + decode_r_type_insn (LR_D, ival);
>> else if (is_sc_w_insn (ival))
>> - decode_r_type_insn (SC, ival);
>> + decode_r_type_insn (SC_W, ival);
>> else if (is_sc_d_insn (ival))
>> - decode_r_type_insn (SC, ival);
>> + decode_r_type_insn (SC_D, ival);
>> else if (is_ecall_insn (ival))
>> decode_i_type_insn (ECALL, ival);
>> else if (is_ld_insn (ival))
>> @@ -1944,6 +2025,24 @@ riscv_insn::decode (struct gdbarch *gdbarch, CORE_ADDR pc)
>> m_rd = decode_register_index (ival, OP_SH_CRS1S);
>> m_imm.s = EXTRACT_CITYPE_LUI_IMM (ival);
>> }
>> + else if (is_c_srli_insn (ival))
>> + decode_cb_type_insn (SRLI, ival);
>> + else if (is_c_srai_insn (ival))
>> + decode_cb_type_insn (SRAI, ival);
>> + else if (is_c_andi_insn (ival))
>> + decode_cb_type_insn (ANDI, ival);
>> + else if (is_c_sub_insn (ival))
>> + decode_ca_type_insn (SUB, ival);
>> + else if (is_c_xor_insn (ival))
>> + decode_ca_type_insn (XOR, ival);
>> + else if (is_c_or_insn (ival))
>> + decode_ca_type_insn (OR, ival);
>> + else if (is_c_and_insn (ival))
>> + decode_ca_type_insn (AND, ival);
>> + else if (is_c_subw_insn (ival))
>> + decode_ca_type_insn (SUBW, ival);
>> + else if (is_c_addw_insn (ival))
>> + decode_ca_type_insn (ADDW, ival);
>> else if (is_c_li_insn (ival))
>> decode_ci_type_insn (LI, ival);
>> /* C_SD and C_FSW have the same opcode. C_SD is RV64 and RV128 only,
>> @@ -4404,51 +4503,138 @@ riscv_next_pc (struct regcache *regcache, CORE_ADDR pc)
>> return next_pc;
>> }
>>
>> +/* Return true if INSN is not a control transfer instruction and is allowed to
>> + appear in the middle of the lr/sc sequence. */
>> +
>> +static bool
>> +riscv_insn_is_non_cti_and_allowed_in_atomic_sequence(const struct riscv_insn &insn)
>> +{
>> + switch (insn.opcode ())
>> + {
>> + case riscv_insn::LUI:
>> + case riscv_insn::AUIPC:
>> + case riscv_insn::ADDI:
>> + case riscv_insn::ADDIW:
>> + case riscv_insn::SLTI:
>> + case riscv_insn::SLTIU:
>> + case riscv_insn::XORI:
>> + case riscv_insn::ORI:
>> + case riscv_insn::ANDI:
>> + case riscv_insn::SLLI:
>> + case riscv_insn::SLLIW:
>> + case riscv_insn::SRLI:
>> + case riscv_insn::SRLIW:
>> + case riscv_insn::SRAI:
>> + case riscv_insn::ADD:
>> + case riscv_insn::ADDW:
>> + case riscv_insn::SRAIW:
>> + case riscv_insn::SUB:
>> + case riscv_insn::SUBW:
>> + case riscv_insn::SLL:
>> + case riscv_insn::SLLW:
>> + case riscv_insn::SLT:
>> + case riscv_insn::SLTU:
>> + case riscv_insn::XOR:
>> + case riscv_insn::SRL:
>> + case riscv_insn::SRLW:
>> + case riscv_insn::SRA:
>> + case riscv_insn::SRAW:
>> + case riscv_insn::OR:
>> + case riscv_insn::AND:
>> + return true;
>> + }
>> +
>> + return false;
>> +}
>> +
>> +/* Return true if INSN is a direct branch insctruction. */
>> +
>> +static bool
>> +riscv_insn_is_direct_branch(const struct riscv_insn &insn)
>> +{
>> + switch (insn.opcode ())
>> + {
>> + case riscv_insn::BEQ:
>> + case riscv_insn::BNE:
>> + case riscv_insn::BLT:
>> + case riscv_insn::BGE:
>> + case riscv_insn::BLTU:
>> + case riscv_insn::BGEU:
>> + case riscv_insn::JAL:
>> + return true;
>> + }
>> +
>> + return false;
>> +}
>> +
>> /* We can't put a breakpoint in the middle of a lr/sc atomic sequence, so look
>> for the end of the sequence and put the breakpoint there. */
>>
>> -static bool
>> -riscv_next_pc_atomic_sequence (struct regcache *regcache, CORE_ADDR pc,
>> - CORE_ADDR *next_pc)
>> +static std::vector<CORE_ADDR>
>> +riscv_deal_with_atomic_sequence (struct regcache *regcache, CORE_ADDR pc)
>> {
>> struct gdbarch *gdbarch = regcache->arch ();
>> struct riscv_insn insn;
>> - CORE_ADDR cur_step_pc = pc;
>> - CORE_ADDR last_addr = 0;
>> + CORE_ADDR cur_step_pc = pc, next_pc;
>> + std::vector<CORE_ADDR> next_pcs;
>> + const int atomic_sequence_length = 16;
>> + bool found_valid_atomic_sequence = false;
>> + enum riscv_insn::opcode lr_opcode;
>>
>> /* First instruction has to be a load reserved. */
>> insn.decode (gdbarch, cur_step_pc);
>> - if (insn.opcode () != riscv_insn::LR)
>> - return false;
>> - cur_step_pc = cur_step_pc + insn.length ();
>> + lr_opcode = insn.opcode ();
>> + if (lr_opcode != riscv_insn::LR_D && lr_opcode != riscv_insn::LR_W)
>> + return {};
>>
>> - /* Next instruction should be branch to exit. */
>> - insn.decode (gdbarch, cur_step_pc);
>> - if (insn.opcode () != riscv_insn::BNE)
>> - return false;
>> - last_addr = cur_step_pc + insn.imm_signed ();
>> - cur_step_pc = cur_step_pc + insn.length ();
>> + /* A lr/sc sequence comprise at most 16 instructions placed sequentially in memory. */
>> + for (int insn_count = 0; insn_count < atomic_sequence_length; ++insn_count)
>> + {
>> + cur_step_pc += insn.length ();
>> + insn.decode (gdbarch, cur_step_pc);
>>
>> - /* Next instruction should be store conditional. */
>> - insn.decode (gdbarch, cur_step_pc);
>> - if (insn.opcode () != riscv_insn::SC)
>> - return false;
>> - cur_step_pc = cur_step_pc + insn.length ();
>> + /* The dynamic code executed between lr/sc can only contain instructions
>> + from the base I instruction set, excluding loads, stores, backward jumps,
>> + taken backward branches, JALR, FENCE, FENCE.I, and SYSTEM instructions.
>> + If the C extension is supported, then compressed forms of the aforementioned
>> + I instructions are also permitted. */
That ends up with some odd interactions with the Zc extensions. I
opened https://github.com/riscv/riscv-isa-manual/issues/1157 to check.
>> +
>> + if (riscv_insn_is_non_cti_and_allowed_in_atomic_sequence (insn))
>> + {
>> + continue;
>> + }
>> + /* Look for a conditional branch instruction, check if it's taken forward or not. */
>> + else if (riscv_insn_is_direct_branch (insn))
>> + {
>> + if (insn.imm_signed () > 0)
>> + next_pc = cur_step_pc + insn.imm_signed ();
>
> Oh I forgot to do a push_back here and my simple tests didn't catch
> this. I'll send a V2 patch soon. Sorry.
I also think the branch detection logic here isn't quite right: the
LR/SC sequence only becomes unconstrained if the backwards branch is
taken at runtime. I can't find any examples of people doing that, but
one could imagine some sort of spin-type patterns with an early retry
that do this sort of thing.
The same logic applies to disallowed instructions, but I don't think
it's super likely those end up conditionally executed in LR/SC sequences
so maybe we can ignore that?
> Best regards,
>
> Yang
>
>> + else
>> + break;
>> + }
>> + /* Look for a paired SC instruction which closes the atomic sequence. */
>> + else if ((insn.opcode () == riscv_insn::SC_D && lr_opcode == riscv_insn::LR_D)
>> + || (insn.opcode () == riscv_insn::SC_W && lr_opcode == riscv_insn::LR_W))
>> + {
>> + found_valid_atomic_sequence = true;
>> + }
>> + else
>> + break;
>> + }
>> +
>> + if (!found_valid_atomic_sequence)
>> + return {};
>>
>> /* Next instruction should be branch to start. */
>> insn.decode (gdbarch, cur_step_pc);
>> if (insn.opcode () != riscv_insn::BNE)
>> - return false;
>> + return {};
>> if (pc != (cur_step_pc + insn.imm_signed ()))
>> - return false;
>> - cur_step_pc = cur_step_pc + insn.length ();
>> -
>> - /* We should now be at the end of the sequence. */
>> - if (cur_step_pc != last_addr)
>> - return false;
>> + return {};
>> + cur_step_pc += insn.length ();
>>
>> - *next_pc = cur_step_pc;
>> - return true;
>> + next_pc = cur_step_pc;
>> + next_pcs.push_back (next_pc);
>> + return next_pcs;
>> }
>>
>> /* This is called just before we want to resume the inferior, if we want to
>> @@ -4458,14 +4644,14 @@ riscv_next_pc_atomic_sequence (struct regcache *regcache, CORE_ADDR pc,
>> std::vector<CORE_ADDR>
>> riscv_software_single_step (struct regcache *regcache)
>> {
>> - CORE_ADDR pc, next_pc;
>> -
>> - pc = regcache_read_pc (regcache);
>> + CORE_ADDR cur_pc = regcache_read_pc (regcache), next_pc;
>> + std::vector<CORE_ADDR> next_pcs
>> + = riscv_deal_with_atomic_sequence (regcache, cur_pc);
>>
>> - if (riscv_next_pc_atomic_sequence (regcache, pc, &next_pc))
>> - return {next_pc};
>> + if (!next_pcs.empty ())
>> + return next_pcs;
>>
>> - next_pc = riscv_next_pc (regcache, pc);
>> + next_pc = riscv_next_pc (regcache, cur_pc);
>>
>> return {next_pc};
>> }
On 2023/11/2 23:58, Palmer Dabbelt wrote:
> On Thu, 02 Nov 2023 08:21:16 PDT (-0700), liuyang22@iscas.ac.cn wrote:
>>
>> On 2023/11/2 22:03, Yang Liu wrote:
>>> Per RISC-V spec, the lr/sc sequence can consist of up to 16
>>> instructions, and we
>>> cannot insert breakpoints in the middle of this sequence. Before
>>> this, we only
>>> detected a specific pattern (the most common one). This patch
>>> improves this part
>>> and now supports more complex pattern detection.
>
> Thanks. The general idea here seems pretty reasonable, but I haven't
> looked that closely at the code. I'm super behind on Linux stuff
> right now, so I might be slower that usual...
>
No rush, thank you for your time!
>>> gdb/ChangeLog:
>>>
>>> * gdb/riscv-tdep.c (class riscv_insn): Add more needed
>>> opcode enums.
>>> (riscv_insn::decode): Decode newly added opcodes.
>>> (riscv_insn_is_non_cti_and_allowed_in_atomic_sequence): New.
>>> (riscv_insn_is_direct_branch): New.
>>> (riscv_next_pc_atomic_sequence): Removed.
>>> (riscv_deal_with_atomic_sequence): Rename from
>>> riscv_next_pc_atomic_sequence.
>>> (riscv_software_single_step): Adjust to use the renamed one.
>>>
>>> Signed-off-by: Yang Liu <liuyang22@iscas.ac.cn>
>>> ---
>>> gdb/riscv-tdep.c | 266
>>> ++++++++++++++++++++++++++++++++++++++++-------
>>> 1 file changed, 226 insertions(+), 40 deletions(-)
>>>
>>> diff --git a/gdb/riscv-tdep.c b/gdb/riscv-tdep.c
>>> index 3a2891c2c92..faff1aab420 100644
>>> --- a/gdb/riscv-tdep.c
>>> +++ b/gdb/riscv-tdep.c
>>> @@ -1578,8 +1578,34 @@ class riscv_insn
>>> BLTU,
>>> BGEU,
>>> /* These are needed for stepping over atomic sequences. */
>>> - LR,
>>> - SC,
>>> + SLTI,
>>> + SLTIU,
>>> + XORI,
>>> + ORI,
>>> + ANDI,
>>> + SLLI,
>>> + SLLIW,
>>> + SRLI,
>>> + SRLIW,
>>> + SRAI,
>>> + SRAIW,
>>> + SUB,
>>> + SUBW,
>>> + SLL,
>>> + SLLW,
>>> + SLT,
>>> + SLTU,
>>> + XOR,
>>> + SRL,
>>> + SRLW,
>>> + SRA,
>>> + SRAW,
>>> + OR,
>>> + AND,
>>> + LR_W,
>>> + LR_D,
>>> + SC_W,
>>> + SC_D,
>>> /* This instruction is used to do a syscall. */
>>> ECALL,
>>>
>>> @@ -1768,6 +1794,13 @@ class riscv_insn
>>> m_imm.s = EXTRACT_CBTYPE_IMM (ival);
>>> }
>>>
>>> + void decode_ca_type_insn (enum opcode opcode, ULONGEST ival)
>>> + {
>>> + m_opcode = opcode;
>>> + m_rs1 = decode_register_index_short (ival, OP_SH_CRS1S);
>>> + m_rs2 = decode_register_index_short (ival, OP_SH_CRS2S);
>>> + }
>>> +
>>> /* Fetch instruction from target memory at ADDR, return the
>>> content of
>>> the instruction, and update LEN with the instruction length. */
>>> static ULONGEST fetch_instruction (struct gdbarch *gdbarch,
>>> @@ -1882,14 +1915,62 @@ riscv_insn::decode (struct gdbarch *gdbarch,
>>> CORE_ADDR pc)
>>> decode_b_type_insn (BLTU, ival);
>>> else if (is_bgeu_insn (ival))
>>> decode_b_type_insn (BGEU, ival);
>>> + else if (is_slti_insn(ival))
>>> + decode_i_type_insn (SLTI, ival);
>>> + else if (is_sltiu_insn(ival))
>>> + decode_i_type_insn (SLTIU, ival);
>>> + else if (is_xori_insn(ival))
>>> + decode_i_type_insn (XORI, ival);
>>> + else if (is_ori_insn(ival))
>>> + decode_i_type_insn (ORI, ival);
>>> + else if (is_andi_insn(ival))
>>> + decode_i_type_insn (ANDI, ival);
>>> + else if (is_slli_insn(ival))
>>> + decode_i_type_insn (SLLI, ival);
>>> + else if (is_slliw_insn(ival))
>>> + decode_i_type_insn (SLLIW, ival);
>>> + else if (is_srli_insn(ival))
>>> + decode_i_type_insn (SRLI, ival);
>>> + else if (is_srliw_insn(ival))
>>> + decode_i_type_insn (SRLIW, ival);
>>> + else if (is_srai_insn(ival))
>>> + decode_i_type_insn (SRAI, ival);
>>> + else if (is_sraiw_insn(ival))
>>> + decode_i_type_insn (SRAIW, ival);
>>> + else if (is_sub_insn(ival))
>>> + decode_r_type_insn (SUB, ival);
>>> + else if (is_subw_insn(ival))
>>> + decode_r_type_insn (SUBW, ival);
>>> + else if (is_sll_insn(ival))
>>> + decode_r_type_insn (SLL, ival);
>>> + else if (is_sllw_insn(ival))
>>> + decode_r_type_insn (SLLW, ival);
>>> + else if (is_slt_insn(ival))
>>> + decode_r_type_insn (SLT, ival);
>>> + else if (is_sltu_insn(ival))
>>> + decode_r_type_insn (SLTU, ival);
>>> + else if (is_xor_insn(ival))
>>> + decode_r_type_insn (XOR, ival);
>>> + else if (is_srl_insn(ival))
>>> + decode_r_type_insn (SRL, ival);
>>> + else if (is_srlw_insn(ival))
>>> + decode_r_type_insn (SRLW, ival);
>>> + else if (is_sra_insn(ival))
>>> + decode_r_type_insn (SRA, ival);
>>> + else if (is_sraw_insn(ival))
>>> + decode_r_type_insn (SRAW, ival);
>>> + else if (is_or_insn(ival))
>>> + decode_r_type_insn (OR, ival);
>>> + else if (is_and_insn(ival))
>>> + decode_r_type_insn (AND, ival);
>>> else if (is_lr_w_insn (ival))
>>> - decode_r_type_insn (LR, ival);
>>> + decode_r_type_insn (LR_W, ival);
>>> else if (is_lr_d_insn (ival))
>>> - decode_r_type_insn (LR, ival);
>>> + decode_r_type_insn (LR_D, ival);
>>> else if (is_sc_w_insn (ival))
>>> - decode_r_type_insn (SC, ival);
>>> + decode_r_type_insn (SC_W, ival);
>>> else if (is_sc_d_insn (ival))
>>> - decode_r_type_insn (SC, ival);
>>> + decode_r_type_insn (SC_D, ival);
>>> else if (is_ecall_insn (ival))
>>> decode_i_type_insn (ECALL, ival);
>>> else if (is_ld_insn (ival))
>>> @@ -1944,6 +2025,24 @@ riscv_insn::decode (struct gdbarch *gdbarch,
>>> CORE_ADDR pc)
>>> m_rd = decode_register_index (ival, OP_SH_CRS1S);
>>> m_imm.s = EXTRACT_CITYPE_LUI_IMM (ival);
>>> }
>>> + else if (is_c_srli_insn (ival))
>>> + decode_cb_type_insn (SRLI, ival);
>>> + else if (is_c_srai_insn (ival))
>>> + decode_cb_type_insn (SRAI, ival);
>>> + else if (is_c_andi_insn (ival))
>>> + decode_cb_type_insn (ANDI, ival);
>>> + else if (is_c_sub_insn (ival))
>>> + decode_ca_type_insn (SUB, ival);
>>> + else if (is_c_xor_insn (ival))
>>> + decode_ca_type_insn (XOR, ival);
>>> + else if (is_c_or_insn (ival))
>>> + decode_ca_type_insn (OR, ival);
>>> + else if (is_c_and_insn (ival))
>>> + decode_ca_type_insn (AND, ival);
>>> + else if (is_c_subw_insn (ival))
>>> + decode_ca_type_insn (SUBW, ival);
>>> + else if (is_c_addw_insn (ival))
>>> + decode_ca_type_insn (ADDW, ival);
>>> else if (is_c_li_insn (ival))
>>> decode_ci_type_insn (LI, ival);
>>> /* C_SD and C_FSW have the same opcode. C_SD is RV64 and
>>> RV128 only,
>>> @@ -4404,51 +4503,138 @@ riscv_next_pc (struct regcache *regcache,
>>> CORE_ADDR pc)
>>> return next_pc;
>>> }
>>>
>>> +/* Return true if INSN is not a control transfer instruction and is
>>> allowed to
>>> + appear in the middle of the lr/sc sequence. */
>>> +
>>> +static bool
>>> +riscv_insn_is_non_cti_and_allowed_in_atomic_sequence(const struct
>>> riscv_insn &insn)
>>> +{
>>> + switch (insn.opcode ())
>>> + {
>>> + case riscv_insn::LUI:
>>> + case riscv_insn::AUIPC:
>>> + case riscv_insn::ADDI:
>>> + case riscv_insn::ADDIW:
>>> + case riscv_insn::SLTI:
>>> + case riscv_insn::SLTIU:
>>> + case riscv_insn::XORI:
>>> + case riscv_insn::ORI:
>>> + case riscv_insn::ANDI:
>>> + case riscv_insn::SLLI:
>>> + case riscv_insn::SLLIW:
>>> + case riscv_insn::SRLI:
>>> + case riscv_insn::SRLIW:
>>> + case riscv_insn::SRAI:
>>> + case riscv_insn::ADD:
>>> + case riscv_insn::ADDW:
>>> + case riscv_insn::SRAIW:
>>> + case riscv_insn::SUB:
>>> + case riscv_insn::SUBW:
>>> + case riscv_insn::SLL:
>>> + case riscv_insn::SLLW:
>>> + case riscv_insn::SLT:
>>> + case riscv_insn::SLTU:
>>> + case riscv_insn::XOR:
>>> + case riscv_insn::SRL:
>>> + case riscv_insn::SRLW:
>>> + case riscv_insn::SRA:
>>> + case riscv_insn::SRAW:
>>> + case riscv_insn::OR:
>>> + case riscv_insn::AND:
>>> + return true;
>>> + }
>>> +
>>> + return false;
>>> +}
>>> +
>>> +/* Return true if INSN is a direct branch insctruction. */
>>> +
>>> +static bool
>>> +riscv_insn_is_direct_branch(const struct riscv_insn &insn)
>>> +{
>>> + switch (insn.opcode ())
>>> + {
>>> + case riscv_insn::BEQ:
>>> + case riscv_insn::BNE:
>>> + case riscv_insn::BLT:
>>> + case riscv_insn::BGE:
>>> + case riscv_insn::BLTU:
>>> + case riscv_insn::BGEU:
>>> + case riscv_insn::JAL:
>>> + return true;
>>> + }
>>> +
>>> + return false;
>>> +}
>>> +
>>> /* We can't put a breakpoint in the middle of a lr/sc atomic
>>> sequence, so look
>>> for the end of the sequence and put the breakpoint there. */
>>>
>>> -static bool
>>> -riscv_next_pc_atomic_sequence (struct regcache *regcache, CORE_ADDR
>>> pc,
>>> - CORE_ADDR *next_pc)
>>> +static std::vector<CORE_ADDR>
>>> +riscv_deal_with_atomic_sequence (struct regcache *regcache,
>>> CORE_ADDR pc)
>>> {
>>> struct gdbarch *gdbarch = regcache->arch ();
>>> struct riscv_insn insn;
>>> - CORE_ADDR cur_step_pc = pc;
>>> - CORE_ADDR last_addr = 0;
>>> + CORE_ADDR cur_step_pc = pc, next_pc;
>>> + std::vector<CORE_ADDR> next_pcs;
>>> + const int atomic_sequence_length = 16;
>>> + bool found_valid_atomic_sequence = false;
>>> + enum riscv_insn::opcode lr_opcode;
>>>
>>> /* First instruction has to be a load reserved. */
>>> insn.decode (gdbarch, cur_step_pc);
>>> - if (insn.opcode () != riscv_insn::LR)
>>> - return false;
>>> - cur_step_pc = cur_step_pc + insn.length ();
>>> + lr_opcode = insn.opcode ();
>>> + if (lr_opcode != riscv_insn::LR_D && lr_opcode != riscv_insn::LR_W)
>>> + return {};
>>>
>>> - /* Next instruction should be branch to exit. */
>>> - insn.decode (gdbarch, cur_step_pc);
>>> - if (insn.opcode () != riscv_insn::BNE)
>>> - return false;
>>> - last_addr = cur_step_pc + insn.imm_signed ();
>>> - cur_step_pc = cur_step_pc + insn.length ();
>>> + /* A lr/sc sequence comprise at most 16 instructions placed
>>> sequentially in memory. */
>>> + for (int insn_count = 0; insn_count < atomic_sequence_length;
>>> ++insn_count)
>>> + {
>>> + cur_step_pc += insn.length ();
>>> + insn.decode (gdbarch, cur_step_pc);
>>>
>>> - /* Next instruction should be store conditional. */
>>> - insn.decode (gdbarch, cur_step_pc);
>>> - if (insn.opcode () != riscv_insn::SC)
>>> - return false;
>>> - cur_step_pc = cur_step_pc + insn.length ();
>>> + /* The dynamic code executed between lr/sc can only contain
>>> instructions
>>> + from the base I instruction set, excluding loads, stores,
>>> backward jumps,
>>> + taken backward branches, JALR, FENCE, FENCE.I, and SYSTEM
>>> instructions.
>>> + If the C extension is supported, then compressed forms of the
>>> aforementioned
>>> + I instructions are also permitted. */
>
> That ends up with some odd interactions with the Zc extensions. I
> opened https://github.com/riscv/riscv-isa-manual/issues/1157 to check.
>
Okay, let's track this concern there.
>>> +
>>> + if (riscv_insn_is_non_cti_and_allowed_in_atomic_sequence (insn))
>>> + {
>>> + continue;
>>> + }
>>> + /* Look for a conditional branch instruction, check if it's
>>> taken forward or not. */
>>> + else if (riscv_insn_is_direct_branch (insn))
>>> + {
>>> + if (insn.imm_signed () > 0)
>>> + next_pc = cur_step_pc + insn.imm_signed ();
>>
>> Oh I forgot to do a push_back here and my simple tests didn't catch
>> this. I'll send a V2 patch soon. Sorry.
>
V2 posted.
> I also think the branch detection logic here isn't quite right: the
> LR/SC sequence only becomes unconstrained if the backwards branch is
> taken at runtime. I can't find any examples of people doing that, but
> one could imagine some sort of spin-type patterns with an early retry
> that do this sort of thing.
>
> The same logic applies to disallowed instructions, but I don't think
> it's super likely those end up conditionally executed in LR/SC
> sequences so maybe we can ignore that?
>
Yeah, it seems difficult to get the lr/sc sequence completely correct,
so can we implement a version that is correct in most cases (as this
patch does)?
>> Best regards,
>>
>> Yang
>>
>>> + else
>>> + break;
>>> + }
>>> + /* Look for a paired SC instruction which closes the atomic
>>> sequence. */
>>> + else if ((insn.opcode () == riscv_insn::SC_D && lr_opcode ==
>>> riscv_insn::LR_D)
>>> + || (insn.opcode () == riscv_insn::SC_W && lr_opcode ==
>>> riscv_insn::LR_W))
>>> + {
>>> + found_valid_atomic_sequence = true;
>>> + }
>>> + else
>>> + break;
>>> + }
>>> +
>>> + if (!found_valid_atomic_sequence)
>>> + return {};
>>>
>>> /* Next instruction should be branch to start. */
>>> insn.decode (gdbarch, cur_step_pc);
>>> if (insn.opcode () != riscv_insn::BNE)
>>> - return false;
>>> + return {};
>>> if (pc != (cur_step_pc + insn.imm_signed ()))
>>> - return false;
>>> - cur_step_pc = cur_step_pc + insn.length ();
>>> -
>>> - /* We should now be at the end of the sequence. */
>>> - if (cur_step_pc != last_addr)
>>> - return false;
>>> + return {};
>>> + cur_step_pc += insn.length ();
>>>
>>> - *next_pc = cur_step_pc;
>>> - return true;
>>> + next_pc = cur_step_pc;
>>> + next_pcs.push_back (next_pc);
>>> + return next_pcs;
>>> }
>>>
>>> /* This is called just before we want to resume the inferior, if
>>> we want to
>>> @@ -4458,14 +4644,14 @@ riscv_next_pc_atomic_sequence (struct
>>> regcache *regcache, CORE_ADDR pc,
>>> std::vector<CORE_ADDR>
>>> riscv_software_single_step (struct regcache *regcache)
>>> {
>>> - CORE_ADDR pc, next_pc;
>>> -
>>> - pc = regcache_read_pc (regcache);
>>> + CORE_ADDR cur_pc = regcache_read_pc (regcache), next_pc;
>>> + std::vector<CORE_ADDR> next_pcs
>>> + = riscv_deal_with_atomic_sequence (regcache, cur_pc);
>>>
>>> - if (riscv_next_pc_atomic_sequence (regcache, pc, &next_pc))
>>> - return {next_pc};
>>> + if (!next_pcs.empty ())
>>> + return next_pcs;
>>>
>>> - next_pc = riscv_next_pc (regcache, pc);
>>> + next_pc = riscv_next_pc (regcache, cur_pc);
>>>
>>> return {next_pc};
>>> }
@@ -1578,8 +1578,34 @@ class riscv_insn
BLTU,
BGEU,
/* These are needed for stepping over atomic sequences. */
- LR,
- SC,
+ SLTI,
+ SLTIU,
+ XORI,
+ ORI,
+ ANDI,
+ SLLI,
+ SLLIW,
+ SRLI,
+ SRLIW,
+ SRAI,
+ SRAIW,
+ SUB,
+ SUBW,
+ SLL,
+ SLLW,
+ SLT,
+ SLTU,
+ XOR,
+ SRL,
+ SRLW,
+ SRA,
+ SRAW,
+ OR,
+ AND,
+ LR_W,
+ LR_D,
+ SC_W,
+ SC_D,
/* This instruction is used to do a syscall. */
ECALL,
@@ -1768,6 +1794,13 @@ class riscv_insn
m_imm.s = EXTRACT_CBTYPE_IMM (ival);
}
+ void decode_ca_type_insn (enum opcode opcode, ULONGEST ival)
+ {
+ m_opcode = opcode;
+ m_rs1 = decode_register_index_short (ival, OP_SH_CRS1S);
+ m_rs2 = decode_register_index_short (ival, OP_SH_CRS2S);
+ }
+
/* Fetch instruction from target memory at ADDR, return the content of
the instruction, and update LEN with the instruction length. */
static ULONGEST fetch_instruction (struct gdbarch *gdbarch,
@@ -1882,14 +1915,62 @@ riscv_insn::decode (struct gdbarch *gdbarch, CORE_ADDR pc)
decode_b_type_insn (BLTU, ival);
else if (is_bgeu_insn (ival))
decode_b_type_insn (BGEU, ival);
+ else if (is_slti_insn(ival))
+ decode_i_type_insn (SLTI, ival);
+ else if (is_sltiu_insn(ival))
+ decode_i_type_insn (SLTIU, ival);
+ else if (is_xori_insn(ival))
+ decode_i_type_insn (XORI, ival);
+ else if (is_ori_insn(ival))
+ decode_i_type_insn (ORI, ival);
+ else if (is_andi_insn(ival))
+ decode_i_type_insn (ANDI, ival);
+ else if (is_slli_insn(ival))
+ decode_i_type_insn (SLLI, ival);
+ else if (is_slliw_insn(ival))
+ decode_i_type_insn (SLLIW, ival);
+ else if (is_srli_insn(ival))
+ decode_i_type_insn (SRLI, ival);
+ else if (is_srliw_insn(ival))
+ decode_i_type_insn (SRLIW, ival);
+ else if (is_srai_insn(ival))
+ decode_i_type_insn (SRAI, ival);
+ else if (is_sraiw_insn(ival))
+ decode_i_type_insn (SRAIW, ival);
+ else if (is_sub_insn(ival))
+ decode_r_type_insn (SUB, ival);
+ else if (is_subw_insn(ival))
+ decode_r_type_insn (SUBW, ival);
+ else if (is_sll_insn(ival))
+ decode_r_type_insn (SLL, ival);
+ else if (is_sllw_insn(ival))
+ decode_r_type_insn (SLLW, ival);
+ else if (is_slt_insn(ival))
+ decode_r_type_insn (SLT, ival);
+ else if (is_sltu_insn(ival))
+ decode_r_type_insn (SLTU, ival);
+ else if (is_xor_insn(ival))
+ decode_r_type_insn (XOR, ival);
+ else if (is_srl_insn(ival))
+ decode_r_type_insn (SRL, ival);
+ else if (is_srlw_insn(ival))
+ decode_r_type_insn (SRLW, ival);
+ else if (is_sra_insn(ival))
+ decode_r_type_insn (SRA, ival);
+ else if (is_sraw_insn(ival))
+ decode_r_type_insn (SRAW, ival);
+ else if (is_or_insn(ival))
+ decode_r_type_insn (OR, ival);
+ else if (is_and_insn(ival))
+ decode_r_type_insn (AND, ival);
else if (is_lr_w_insn (ival))
- decode_r_type_insn (LR, ival);
+ decode_r_type_insn (LR_W, ival);
else if (is_lr_d_insn (ival))
- decode_r_type_insn (LR, ival);
+ decode_r_type_insn (LR_D, ival);
else if (is_sc_w_insn (ival))
- decode_r_type_insn (SC, ival);
+ decode_r_type_insn (SC_W, ival);
else if (is_sc_d_insn (ival))
- decode_r_type_insn (SC, ival);
+ decode_r_type_insn (SC_D, ival);
else if (is_ecall_insn (ival))
decode_i_type_insn (ECALL, ival);
else if (is_ld_insn (ival))
@@ -1944,6 +2025,24 @@ riscv_insn::decode (struct gdbarch *gdbarch, CORE_ADDR pc)
m_rd = decode_register_index (ival, OP_SH_CRS1S);
m_imm.s = EXTRACT_CITYPE_LUI_IMM (ival);
}
+ else if (is_c_srli_insn (ival))
+ decode_cb_type_insn (SRLI, ival);
+ else if (is_c_srai_insn (ival))
+ decode_cb_type_insn (SRAI, ival);
+ else if (is_c_andi_insn (ival))
+ decode_cb_type_insn (ANDI, ival);
+ else if (is_c_sub_insn (ival))
+ decode_ca_type_insn (SUB, ival);
+ else if (is_c_xor_insn (ival))
+ decode_ca_type_insn (XOR, ival);
+ else if (is_c_or_insn (ival))
+ decode_ca_type_insn (OR, ival);
+ else if (is_c_and_insn (ival))
+ decode_ca_type_insn (AND, ival);
+ else if (is_c_subw_insn (ival))
+ decode_ca_type_insn (SUBW, ival);
+ else if (is_c_addw_insn (ival))
+ decode_ca_type_insn (ADDW, ival);
else if (is_c_li_insn (ival))
decode_ci_type_insn (LI, ival);
/* C_SD and C_FSW have the same opcode. C_SD is RV64 and RV128 only,
@@ -4404,51 +4503,138 @@ riscv_next_pc (struct regcache *regcache, CORE_ADDR pc)
return next_pc;
}
+/* Return true if INSN is not a control transfer instruction and is allowed to
+ appear in the middle of the lr/sc sequence. */
+
+static bool
+riscv_insn_is_non_cti_and_allowed_in_atomic_sequence(const struct riscv_insn &insn)
+{
+ switch (insn.opcode ())
+ {
+ case riscv_insn::LUI:
+ case riscv_insn::AUIPC:
+ case riscv_insn::ADDI:
+ case riscv_insn::ADDIW:
+ case riscv_insn::SLTI:
+ case riscv_insn::SLTIU:
+ case riscv_insn::XORI:
+ case riscv_insn::ORI:
+ case riscv_insn::ANDI:
+ case riscv_insn::SLLI:
+ case riscv_insn::SLLIW:
+ case riscv_insn::SRLI:
+ case riscv_insn::SRLIW:
+ case riscv_insn::SRAI:
+ case riscv_insn::ADD:
+ case riscv_insn::ADDW:
+ case riscv_insn::SRAIW:
+ case riscv_insn::SUB:
+ case riscv_insn::SUBW:
+ case riscv_insn::SLL:
+ case riscv_insn::SLLW:
+ case riscv_insn::SLT:
+ case riscv_insn::SLTU:
+ case riscv_insn::XOR:
+ case riscv_insn::SRL:
+ case riscv_insn::SRLW:
+ case riscv_insn::SRA:
+ case riscv_insn::SRAW:
+ case riscv_insn::OR:
+ case riscv_insn::AND:
+ return true;
+ }
+
+ return false;
+}
+
+/* Return true if INSN is a direct branch insctruction. */
+
+static bool
+riscv_insn_is_direct_branch(const struct riscv_insn &insn)
+{
+ switch (insn.opcode ())
+ {
+ case riscv_insn::BEQ:
+ case riscv_insn::BNE:
+ case riscv_insn::BLT:
+ case riscv_insn::BGE:
+ case riscv_insn::BLTU:
+ case riscv_insn::BGEU:
+ case riscv_insn::JAL:
+ return true;
+ }
+
+ return false;
+}
+
/* We can't put a breakpoint in the middle of a lr/sc atomic sequence, so look
for the end of the sequence and put the breakpoint there. */
-static bool
-riscv_next_pc_atomic_sequence (struct regcache *regcache, CORE_ADDR pc,
- CORE_ADDR *next_pc)
+static std::vector<CORE_ADDR>
+riscv_deal_with_atomic_sequence (struct regcache *regcache, CORE_ADDR pc)
{
struct gdbarch *gdbarch = regcache->arch ();
struct riscv_insn insn;
- CORE_ADDR cur_step_pc = pc;
- CORE_ADDR last_addr = 0;
+ CORE_ADDR cur_step_pc = pc, next_pc;
+ std::vector<CORE_ADDR> next_pcs;
+ const int atomic_sequence_length = 16;
+ bool found_valid_atomic_sequence = false;
+ enum riscv_insn::opcode lr_opcode;
/* First instruction has to be a load reserved. */
insn.decode (gdbarch, cur_step_pc);
- if (insn.opcode () != riscv_insn::LR)
- return false;
- cur_step_pc = cur_step_pc + insn.length ();
+ lr_opcode = insn.opcode ();
+ if (lr_opcode != riscv_insn::LR_D && lr_opcode != riscv_insn::LR_W)
+ return {};
- /* Next instruction should be branch to exit. */
- insn.decode (gdbarch, cur_step_pc);
- if (insn.opcode () != riscv_insn::BNE)
- return false;
- last_addr = cur_step_pc + insn.imm_signed ();
- cur_step_pc = cur_step_pc + insn.length ();
+ /* A lr/sc sequence comprise at most 16 instructions placed sequentially in memory. */
+ for (int insn_count = 0; insn_count < atomic_sequence_length; ++insn_count)
+ {
+ cur_step_pc += insn.length ();
+ insn.decode (gdbarch, cur_step_pc);
- /* Next instruction should be store conditional. */
- insn.decode (gdbarch, cur_step_pc);
- if (insn.opcode () != riscv_insn::SC)
- return false;
- cur_step_pc = cur_step_pc + insn.length ();
+ /* The dynamic code executed between lr/sc can only contain instructions
+ from the base I instruction set, excluding loads, stores, backward jumps,
+ taken backward branches, JALR, FENCE, FENCE.I, and SYSTEM instructions.
+ If the C extension is supported, then compressed forms of the aforementioned
+ I instructions are also permitted. */
+
+ if (riscv_insn_is_non_cti_and_allowed_in_atomic_sequence (insn))
+ {
+ continue;
+ }
+ /* Look for a conditional branch instruction, check if it's taken forward or not. */
+ else if (riscv_insn_is_direct_branch (insn))
+ {
+ if (insn.imm_signed () > 0)
+ next_pc = cur_step_pc + insn.imm_signed ();
+ else
+ break;
+ }
+ /* Look for a paired SC instruction which closes the atomic sequence. */
+ else if ((insn.opcode () == riscv_insn::SC_D && lr_opcode == riscv_insn::LR_D)
+ || (insn.opcode () == riscv_insn::SC_W && lr_opcode == riscv_insn::LR_W))
+ {
+ found_valid_atomic_sequence = true;
+ }
+ else
+ break;
+ }
+
+ if (!found_valid_atomic_sequence)
+ return {};
/* Next instruction should be branch to start. */
insn.decode (gdbarch, cur_step_pc);
if (insn.opcode () != riscv_insn::BNE)
- return false;
+ return {};
if (pc != (cur_step_pc + insn.imm_signed ()))
- return false;
- cur_step_pc = cur_step_pc + insn.length ();
-
- /* We should now be at the end of the sequence. */
- if (cur_step_pc != last_addr)
- return false;
+ return {};
+ cur_step_pc += insn.length ();
- *next_pc = cur_step_pc;
- return true;
+ next_pc = cur_step_pc;
+ next_pcs.push_back (next_pc);
+ return next_pcs;
}
/* This is called just before we want to resume the inferior, if we want to
@@ -4458,14 +4644,14 @@ riscv_next_pc_atomic_sequence (struct regcache *regcache, CORE_ADDR pc,
std::vector<CORE_ADDR>
riscv_software_single_step (struct regcache *regcache)
{
- CORE_ADDR pc, next_pc;
-
- pc = regcache_read_pc (regcache);
+ CORE_ADDR cur_pc = regcache_read_pc (regcache), next_pc;
+ std::vector<CORE_ADDR> next_pcs
+ = riscv_deal_with_atomic_sequence (regcache, cur_pc);
- if (riscv_next_pc_atomic_sequence (regcache, pc, &next_pc))
- return {next_pc};
+ if (!next_pcs.empty ())
+ return next_pcs;
- next_pc = riscv_next_pc (regcache, pc);
+ next_pc = riscv_next_pc (regcache, cur_pc);
return {next_pc};
}