sim: riscv: Fix some issues with class-a instructions

Message ID AS8P193MB1285BC49239C6CAFB43FC57CE4122@AS8P193MB1285.EURP193.PROD.OUTLOOK.COM
State New
Headers
Series sim: riscv: Fix some issues with class-a instructions |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_gdb_build--master-aarch64 success Testing passed
linaro-tcwg-bot/tcwg_gdb_build--master-arm success Testing passed
linaro-tcwg-bot/tcwg_gdb_check--master-aarch64 success Testing passed
linaro-tcwg-bot/tcwg_gdb_check--master-arm success Testing passed

Commit Message

Bernd Edlinger April 22, 2024, 9:45 a.m. UTC
  This fixes some issues with atomic instruction handling.  First the
instructions may have AQ and/or RL bits set, but the emulator has no
such concept, so we have to ignore those.

According to the spec the memory must be naturally aligned, otherwise
an exception shall be thrown, so do the sim_core read/write aligned.
In the case of riscv64 target, there were the LR_D and SC_D
64bit load and store instructions missing, so add those.

Also the AMOMIN/AMOMAX[U]_W instructions were not correct for riscv64
because the upper half word of the input registers were not ignored
as they should, so use explicit type-casts to uint32_t and int32_t
for those.

And finally make the class-a instruction set only executable if a
riscv cpu model with A extension is selected.
---
 sim/riscv/sim-main.c | 72 ++++++++++++++++++++++++++++++++------------
 1 file changed, 53 insertions(+), 19 deletions(-)
  

Comments

Andrew Burgess April 22, 2024, 3:01 p.m. UTC | #1
Bernd Edlinger <bernd.edlinger@hotmail.de> writes:

> This fixes some issues with atomic instruction handling.  First the
> instructions may have AQ and/or RL bits set, but the emulator has no
> such concept, so we have to ignore those.
>
> According to the spec the memory must be naturally aligned, otherwise
> an exception shall be thrown, so do the sim_core read/write aligned.
> In the case of riscv64 target, there were the LR_D and SC_D
> 64bit load and store instructions missing, so add those.
>
> Also the AMOMIN/AMOMAX[U]_W instructions were not correct for riscv64
> because the upper half word of the input registers were not ignored
> as they should, so use explicit type-casts to uint32_t and int32_t
> for those.
>
> And finally make the class-a instruction set only executable if a
> riscv cpu model with A extension is selected.
> ---
>  sim/riscv/sim-main.c | 72 ++++++++++++++++++++++++++++++++------------
>  1 file changed, 53 insertions(+), 19 deletions(-)
>
> diff --git a/sim/riscv/sim-main.c b/sim/riscv/sim-main.c
> index e4b15b533ba..be38baa2f93 100644
> --- a/sim/riscv/sim-main.c
> +++ b/sim/riscv/sim-main.c
> @@ -841,6 +841,9 @@ execute_m (SIM_CPU *cpu, unsigned_word iw, const struct riscv_opcode *op)
>  static sim_cia
>  execute_a (SIM_CPU *cpu, unsigned_word iw, const struct riscv_opcode *op)
>  {
> +  unsigned_word mask_aq = OP_MASK_AQ << OP_SH_AQ;
> +  unsigned_word mask_rl = OP_MASK_RL << OP_SH_RL;
> +  unsigned_word mask_aqrl = mask_aq | mask_rl;
>    struct riscv_sim_cpu *riscv_cpu = RISCV_SIM_CPU (cpu);
>    SIM_DESC sd = CPU_STATE (cpu);
>    struct riscv_sim_state *state = RISCV_SIM_STATE (sd);
> @@ -855,13 +858,18 @@ execute_a (SIM_CPU *cpu, unsigned_word iw, const struct riscv_opcode *op)
>    sim_cia pc = riscv_cpu->pc + 4;
>  
>    /* Handle these two load/store operations specifically.  */
> -  switch (op->match)
> +  switch (op->match & ~mask_aqrl)

Given the aq/rl bits are not handled, maybe we should be issuing a 1
time warning if we encounter an instruction with these bits set?  Or
maybe it's not relevant for the simulator?  I think more detail about
what the thinking is here would help: Is this safe to ignore, or is this
something we're putting off for the future?

>      {
> +    case MATCH_LR_D:
>      case MATCH_LR_W:
>        TRACE_INSN (cpu, "%s %s, (%s);", op->name, rd_name, rs1_name);
> -      store_rd (cpu, rd,
> -	sim_core_read_unaligned_4 (cpu, riscv_cpu->pc, read_map,
> -				   riscv_cpu->regs[rs1]));
> +      if (op->xlen_requirement == 64)
> +	tmp = sim_core_read_aligned_8 (cpu, riscv_cpu->pc, read_map,
> +				       riscv_cpu->regs[rs1]);
> +      else
> +	tmp = EXTEND32 (sim_core_read_aligned_4 (cpu, riscv_cpu->pc, read_map,
> +						 riscv_cpu->regs[rs1]));

I might be completely off the mark here, but I'm unsure about this
change from using 'unaligned' to 'aligned' here.

From my reading of common/sim-n-core.h, I believe the aligned/unaligned
refers to the callers knowledge of the address.  So calling
sim_core_read_aligned_* means that the common code should assume that
the address is correctly aligned.  While sim_core_read_unaligned_*
handles the case that the address might not be aligned, and raises the
exception.

Please remember: I might be completely wrong here, and you just need to
explain to me what's going on better.



> +      store_rd (cpu, rd, tmp);
>  
>        /* Walk the reservation list to find an existing match.  */
>        amo_curr = state->amo_reserved_list;
> @@ -878,6 +886,7 @@ execute_a (SIM_CPU *cpu, unsigned_word iw, const struct riscv_opcode *op)
>        amo_curr->next = state->amo_reserved_list;
>        state->amo_reserved_list = amo_curr;
>        goto done;
> +    case MATCH_SC_D:
>      case MATCH_SC_W:
>        TRACE_INSN (cpu, "%s %s, %s, (%s);", op->name, rd_name, rs2_name,
>  		  rs1_name);
> @@ -889,7 +898,12 @@ execute_a (SIM_CPU *cpu, unsigned_word iw, const struct riscv_opcode *op)
>  	  if (amo_curr->addr == riscv_cpu->regs[rs1])
>  	    {
>  	      /* We found a reservation, so operate it.  */
> -	      sim_core_write_unaligned_4 (cpu, riscv_cpu->pc, write_map,
> +	      if (op->xlen_requirement == 64)
> +		sim_core_write_aligned_8 (cpu, riscv_cpu->pc, write_map,
> +					  riscv_cpu->regs[rs1],
> +					  riscv_cpu->regs[rs2]);
> +	      else
> +		sim_core_write_aligned_4 (cpu, riscv_cpu->pc, write_map,
>  					  riscv_cpu->regs[rs1],
>  					  riscv_cpu->regs[rs2]);
>  	      store_rd (cpu, rd, 0);
> @@ -913,14 +927,14 @@ execute_a (SIM_CPU *cpu, unsigned_word iw, const struct riscv_opcode *op)
>    TRACE_INSN (cpu, "%s %s, %s, (%s);",
>  	      op->name, rd_name, rs2_name, rs1_name);
>    if (op->xlen_requirement == 64)
> -    tmp = sim_core_read_unaligned_8 (cpu, riscv_cpu->pc, read_map,
> -				     riscv_cpu->regs[rs1]);
> +    tmp = sim_core_read_aligned_8 (cpu, riscv_cpu->pc, read_map,
> +				   riscv_cpu->regs[rs1]);
>    else
> -    tmp = EXTEND32 (sim_core_read_unaligned_4 (cpu, riscv_cpu->pc, read_map,
> -					       riscv_cpu->regs[rs1]));
> +    tmp = EXTEND32 (sim_core_read_aligned_4 (cpu, riscv_cpu->pc, read_map,
> +					     riscv_cpu->regs[rs1]));
>    store_rd (cpu, rd, tmp);
>  
> -  switch (op->match)
> +  switch (op->match & ~mask_aqrl)
>      {
>      case MATCH_AMOADD_D:
>      case MATCH_AMOADD_W:
> @@ -931,25 +945,37 @@ execute_a (SIM_CPU *cpu, unsigned_word iw, const struct riscv_opcode *op)
>        tmp = riscv_cpu->regs[rd] & riscv_cpu->regs[rs2];
>        break;
>      case MATCH_AMOMAX_D:
> -    case MATCH_AMOMAX_W:
>        tmp = max ((signed_word) riscv_cpu->regs[rd],
>  		 (signed_word) riscv_cpu->regs[rs2]);
>        break;
> +    case MATCH_AMOMAX_W:
> +      tmp = max ((int32_t) riscv_cpu->regs[rd],
> +		 (int32_t) riscv_cpu->regs[rs2]);

I wonder if we should use EXTEND32 here?  There seems to be a mix of
styles throughout the file.

> +      break;
>      case MATCH_AMOMAXU_D:
> -    case MATCH_AMOMAXU_W:
>        tmp = max ((unsigned_word) riscv_cpu->regs[rd],
>  		 (unsigned_word) riscv_cpu->regs[rs2]);
>        break;
> +    case MATCH_AMOMAXU_W:
> +      tmp = max ((uint32_t) riscv_cpu->regs[rd],
> +		 (uint32_t) riscv_cpu->regs[rs2]);

And maybe EXTEND32 again here, but I guess we'd still need the cast to
unsigned_word.

> +      break;
>      case MATCH_AMOMIN_D:
> -    case MATCH_AMOMIN_W:
>        tmp = min ((signed_word) riscv_cpu->regs[rd],
>  		 (signed_word) riscv_cpu->regs[rs2]);
>        break;
> +    case MATCH_AMOMIN_W:
> +      tmp = min ((int32_t) riscv_cpu->regs[rd],
> +		 (int32_t) riscv_cpu->regs[rs2]);

Same question as above.

> +      break;
>      case MATCH_AMOMINU_D:
> -    case MATCH_AMOMINU_W:
>        tmp = min ((unsigned_word) riscv_cpu->regs[rd],
>  		 (unsigned_word) riscv_cpu->regs[rs2]);
>        break;
> +    case MATCH_AMOMINU_W:
> +      tmp = min ((uint32_t) riscv_cpu->regs[rd],
> +		 (uint32_t) riscv_cpu->regs[rs2]);

Same question as above.

> +      break;
>      case MATCH_AMOOR_D:
>      case MATCH_AMOOR_W:
>        tmp = riscv_cpu->regs[rd] | riscv_cpu->regs[rs2];
> @@ -968,11 +994,11 @@ execute_a (SIM_CPU *cpu, unsigned_word iw, const struct riscv_opcode *op)
>      }
>  
>    if (op->xlen_requirement == 64)
> -    sim_core_write_unaligned_8 (cpu, riscv_cpu->pc, write_map,
> -				riscv_cpu->regs[rs1], tmp);
> +    sim_core_write_aligned_8 (cpu, riscv_cpu->pc, write_map,
> +			      riscv_cpu->regs[rs1], tmp);
>    else
> -    sim_core_write_unaligned_4 (cpu, riscv_cpu->pc, write_map,
> -				riscv_cpu->regs[rs1], tmp);
> +    sim_core_write_aligned_4 (cpu, riscv_cpu->pc, write_map,
> +			      riscv_cpu->regs[rs1], tmp);
>  
>   done:
>    return pc;
> @@ -1307,7 +1333,15 @@ execute_one (SIM_CPU *cpu, unsigned_word iw, const struct riscv_opcode *op)
>    switch (op->insn_class)
>      {
>      case INSN_CLASS_A:
> -      return execute_a (cpu, iw, op);
> +      /* Check whether model with A extension is selected.  */
> +      if (riscv_cpu->csr.misa & 1)
> +	return execute_a (cpu, iw, op);
> +      else
> +	{
> +	  TRACE_INSN (cpu, "UNHANDLED EXTENSION: %d", op->insn_class);
> +	  sim_engine_halt (sd, cpu, NULL, riscv_cpu->pc, sim_signalled,
> +			   SIM_SIGILL);
> +	}

Thanks,
Andrew

>      case INSN_CLASS_C:
>        /* Check whether model with C extension is selected.  */
>        if (riscv_cpu->csr.misa & 4)
> -- 
> 2.39.2
  
Bernd Edlinger April 22, 2024, 4:25 p.m. UTC | #2
On 4/22/24 17:01, Andrew Burgess wrote:
> Bernd Edlinger <bernd.edlinger@hotmail.de> writes:
> 
>> This fixes some issues with atomic instruction handling.  First the
>> instructions may have AQ and/or RL bits set, but the emulator has no
>> such concept, so we have to ignore those.
>>
>> According to the spec the memory must be naturally aligned, otherwise
>> an exception shall be thrown, so do the sim_core read/write aligned.
>> In the case of riscv64 target, there were the LR_D and SC_D
>> 64bit load and store instructions missing, so add those.
>>
>> Also the AMOMIN/AMOMAX[U]_W instructions were not correct for riscv64
>> because the upper half word of the input registers were not ignored
>> as they should, so use explicit type-casts to uint32_t and int32_t
>> for those.
>>
>> And finally make the class-a instruction set only executable if a
>> riscv cpu model with A extension is selected.
>> ---
>>  sim/riscv/sim-main.c | 72 ++++++++++++++++++++++++++++++++------------
>>  1 file changed, 53 insertions(+), 19 deletions(-)
>>
>> diff --git a/sim/riscv/sim-main.c b/sim/riscv/sim-main.c
>> index e4b15b533ba..be38baa2f93 100644
>> --- a/sim/riscv/sim-main.c
>> +++ b/sim/riscv/sim-main.c
>> @@ -841,6 +841,9 @@ execute_m (SIM_CPU *cpu, unsigned_word iw, const struct riscv_opcode *op)
>>  static sim_cia
>>  execute_a (SIM_CPU *cpu, unsigned_word iw, const struct riscv_opcode *op)
>>  {
>> +  unsigned_word mask_aq = OP_MASK_AQ << OP_SH_AQ;
>> +  unsigned_word mask_rl = OP_MASK_RL << OP_SH_RL;
>> +  unsigned_word mask_aqrl = mask_aq | mask_rl;
>>    struct riscv_sim_cpu *riscv_cpu = RISCV_SIM_CPU (cpu);
>>    SIM_DESC sd = CPU_STATE (cpu);
>>    struct riscv_sim_state *state = RISCV_SIM_STATE (sd);
>> @@ -855,13 +858,18 @@ execute_a (SIM_CPU *cpu, unsigned_word iw, const struct riscv_opcode *op)
>>    sim_cia pc = riscv_cpu->pc + 4;
>>  
>>    /* Handle these two load/store operations specifically.  */
>> -  switch (op->match)
>> +  switch (op->match & ~mask_aqrl)
> 
> Given the aq/rl bits are not handled, maybe we should be issuing a 1
> time warning if we encounter an instruction with these bits set?  Or
> maybe it's not relevant for the simulator?  I think more detail about
> what the thinking is here would help: Is this safe to ignore, or is this
> something we're putting off for the future?
> 

No, these bits mean "acquire" and "release" memory ordering, they are only
noise for a software simulator engine, they would have a meaning when the
memory accesses are executing in parallel to the instruction pipeline.

The issue is, that they are very commonly generated whenever a C++ try/catch
construct is used, and of course also when atomic builtin functions are used.


Thanks
Bernd.

>>      {
>> +    case MATCH_LR_D:
>>      case MATCH_LR_W:
>>        TRACE_INSN (cpu, "%s %s, (%s);", op->name, rd_name, rs1_name);
>> -      store_rd (cpu, rd,
>> -	sim_core_read_unaligned_4 (cpu, riscv_cpu->pc, read_map,
>> -				   riscv_cpu->regs[rs1]));
>> +      if (op->xlen_requirement == 64)
>> +	tmp = sim_core_read_aligned_8 (cpu, riscv_cpu->pc, read_map,
>> +				       riscv_cpu->regs[rs1]);
>> +      else
>> +	tmp = EXTEND32 (sim_core_read_aligned_4 (cpu, riscv_cpu->pc, read_map,
>> +						 riscv_cpu->regs[rs1]));
> 
> I might be completely off the mark here, but I'm unsure about this
> change from using 'unaligned' to 'aligned' here.
> 
> From my reading of common/sim-n-core.h, I believe the aligned/unaligned
> refers to the callers knowledge of the address.  So calling
> sim_core_read_aligned_* means that the common code should assume that
> the address is correctly aligned.  While sim_core_read_unaligned_*
> handles the case that the address might not be aligned, and raises the
> exception.
> 
> Please remember: I might be completely wrong here, and you just need to
> explain to me what's going on better.
> 
> 
> 
>> +      store_rd (cpu, rd, tmp);
>>  
>>        /* Walk the reservation list to find an existing match.  */
>>        amo_curr = state->amo_reserved_list;
>> @@ -878,6 +886,7 @@ execute_a (SIM_CPU *cpu, unsigned_word iw, const struct riscv_opcode *op)
>>        amo_curr->next = state->amo_reserved_list;
>>        state->amo_reserved_list = amo_curr;
>>        goto done;
>> +    case MATCH_SC_D:
>>      case MATCH_SC_W:
>>        TRACE_INSN (cpu, "%s %s, %s, (%s);", op->name, rd_name, rs2_name,
>>  		  rs1_name);
>> @@ -889,7 +898,12 @@ execute_a (SIM_CPU *cpu, unsigned_word iw, const struct riscv_opcode *op)
>>  	  if (amo_curr->addr == riscv_cpu->regs[rs1])
>>  	    {
>>  	      /* We found a reservation, so operate it.  */
>> -	      sim_core_write_unaligned_4 (cpu, riscv_cpu->pc, write_map,
>> +	      if (op->xlen_requirement == 64)
>> +		sim_core_write_aligned_8 (cpu, riscv_cpu->pc, write_map,
>> +					  riscv_cpu->regs[rs1],
>> +					  riscv_cpu->regs[rs2]);
>> +	      else
>> +		sim_core_write_aligned_4 (cpu, riscv_cpu->pc, write_map,
>>  					  riscv_cpu->regs[rs1],
>>  					  riscv_cpu->regs[rs2]);
>>  	      store_rd (cpu, rd, 0);
>> @@ -913,14 +927,14 @@ execute_a (SIM_CPU *cpu, unsigned_word iw, const struct riscv_opcode *op)
>>    TRACE_INSN (cpu, "%s %s, %s, (%s);",
>>  	      op->name, rd_name, rs2_name, rs1_name);
>>    if (op->xlen_requirement == 64)
>> -    tmp = sim_core_read_unaligned_8 (cpu, riscv_cpu->pc, read_map,
>> -				     riscv_cpu->regs[rs1]);
>> +    tmp = sim_core_read_aligned_8 (cpu, riscv_cpu->pc, read_map,
>> +				   riscv_cpu->regs[rs1]);
>>    else
>> -    tmp = EXTEND32 (sim_core_read_unaligned_4 (cpu, riscv_cpu->pc, read_map,
>> -					       riscv_cpu->regs[rs1]));
>> +    tmp = EXTEND32 (sim_core_read_aligned_4 (cpu, riscv_cpu->pc, read_map,
>> +					     riscv_cpu->regs[rs1]));
>>    store_rd (cpu, rd, tmp);
>>  
>> -  switch (op->match)
>> +  switch (op->match & ~mask_aqrl)
>>      {
>>      case MATCH_AMOADD_D:
>>      case MATCH_AMOADD_W:
>> @@ -931,25 +945,37 @@ execute_a (SIM_CPU *cpu, unsigned_word iw, const struct riscv_opcode *op)
>>        tmp = riscv_cpu->regs[rd] & riscv_cpu->regs[rs2];
>>        break;
>>      case MATCH_AMOMAX_D:
>> -    case MATCH_AMOMAX_W:
>>        tmp = max ((signed_word) riscv_cpu->regs[rd],
>>  		 (signed_word) riscv_cpu->regs[rs2]);
>>        break;
>> +    case MATCH_AMOMAX_W:
>> +      tmp = max ((int32_t) riscv_cpu->regs[rd],
>> +		 (int32_t) riscv_cpu->regs[rs2]);
> 
> I wonder if we should use EXTEND32 here?  There seems to be a mix of
> styles throughout the file.
> 
>> +      break;
>>      case MATCH_AMOMAXU_D:
>> -    case MATCH_AMOMAXU_W:
>>        tmp = max ((unsigned_word) riscv_cpu->regs[rd],
>>  		 (unsigned_word) riscv_cpu->regs[rs2]);
>>        break;
>> +    case MATCH_AMOMAXU_W:
>> +      tmp = max ((uint32_t) riscv_cpu->regs[rd],
>> +		 (uint32_t) riscv_cpu->regs[rs2]);
> 
> And maybe EXTEND32 again here, but I guess we'd still need the cast to
> unsigned_word.
> 
>> +      break;
>>      case MATCH_AMOMIN_D:
>> -    case MATCH_AMOMIN_W:
>>        tmp = min ((signed_word) riscv_cpu->regs[rd],
>>  		 (signed_word) riscv_cpu->regs[rs2]);
>>        break;
>> +    case MATCH_AMOMIN_W:
>> +      tmp = min ((int32_t) riscv_cpu->regs[rd],
>> +		 (int32_t) riscv_cpu->regs[rs2]);
> 
> Same question as above.
> 
>> +      break;
>>      case MATCH_AMOMINU_D:
>> -    case MATCH_AMOMINU_W:
>>        tmp = min ((unsigned_word) riscv_cpu->regs[rd],
>>  		 (unsigned_word) riscv_cpu->regs[rs2]);
>>        break;
>> +    case MATCH_AMOMINU_W:
>> +      tmp = min ((uint32_t) riscv_cpu->regs[rd],
>> +		 (uint32_t) riscv_cpu->regs[rs2]);
> 
> Same question as above.
> 
>> +      break;
>>      case MATCH_AMOOR_D:
>>      case MATCH_AMOOR_W:
>>        tmp = riscv_cpu->regs[rd] | riscv_cpu->regs[rs2];
>> @@ -968,11 +994,11 @@ execute_a (SIM_CPU *cpu, unsigned_word iw, const struct riscv_opcode *op)
>>      }
>>  
>>    if (op->xlen_requirement == 64)
>> -    sim_core_write_unaligned_8 (cpu, riscv_cpu->pc, write_map,
>> -				riscv_cpu->regs[rs1], tmp);
>> +    sim_core_write_aligned_8 (cpu, riscv_cpu->pc, write_map,
>> +			      riscv_cpu->regs[rs1], tmp);
>>    else
>> -    sim_core_write_unaligned_4 (cpu, riscv_cpu->pc, write_map,
>> -				riscv_cpu->regs[rs1], tmp);
>> +    sim_core_write_aligned_4 (cpu, riscv_cpu->pc, write_map,
>> +			      riscv_cpu->regs[rs1], tmp);
>>  
>>   done:
>>    return pc;
>> @@ -1307,7 +1333,15 @@ execute_one (SIM_CPU *cpu, unsigned_word iw, const struct riscv_opcode *op)
>>    switch (op->insn_class)
>>      {
>>      case INSN_CLASS_A:
>> -      return execute_a (cpu, iw, op);
>> +      /* Check whether model with A extension is selected.  */
>> +      if (riscv_cpu->csr.misa & 1)
>> +	return execute_a (cpu, iw, op);
>> +      else
>> +	{
>> +	  TRACE_INSN (cpu, "UNHANDLED EXTENSION: %d", op->insn_class);
>> +	  sim_engine_halt (sd, cpu, NULL, riscv_cpu->pc, sim_signalled,
>> +			   SIM_SIGILL);
>> +	}
> 
> Thanks,
> Andrew
> 
>>      case INSN_CLASS_C:
>>        /* Check whether model with C extension is selected.  */
>>        if (riscv_cpu->csr.misa & 4)
>> -- 
>> 2.39.2
>
  
Bernd Edlinger April 23, 2024, 1:56 p.m. UTC | #3
On 4/22/24 17:01, Andrew Burgess wrote:
> Bernd Edlinger <bernd.edlinger@hotmail.de> writes:
> 
>>      {
>> +    case MATCH_LR_D:
>>      case MATCH_LR_W:
>>        TRACE_INSN (cpu, "%s %s, (%s);", op->name, rd_name, rs1_name);
>> -      store_rd (cpu, rd,
>> -	sim_core_read_unaligned_4 (cpu, riscv_cpu->pc, read_map,
>> -				   riscv_cpu->regs[rs1]));
>> +      if (op->xlen_requirement == 64)
>> +	tmp = sim_core_read_aligned_8 (cpu, riscv_cpu->pc, read_map,
>> +				       riscv_cpu->regs[rs1]);
>> +      else
>> +	tmp = EXTEND32 (sim_core_read_aligned_4 (cpu, riscv_cpu->pc, read_map,
>> +						 riscv_cpu->regs[rs1]));
> 
> I might be completely off the mark here, but I'm unsure about this
> change from using 'unaligned' to 'aligned' here.
> 
> From my reading of common/sim-n-core.h, I believe the aligned/unaligned
> refers to the callers knowledge of the address.  So calling
> sim_core_read_aligned_* means that the common code should assume that
> the address is correctly aligned.  While sim_core_read_unaligned_*
> handles the case that the address might not be aligned, and raises the
> exception.
> 
> Please remember: I might be completely wrong here, and you just need to
> explain to me what's going on better.
> 
> 

Ah, you are right, the sim_core_read_aligned does hit an assertion:

sim-core.c:432: assertion failed - (addr & (nr_bytes - 1)) == 0
Aborted

That is of course not what I wanted. I think I should better tempoarily
change the value of "current_alignment".

> 
>> +      store_rd (cpu, rd, tmp);
>>  
>>        /* Walk the reservation list to find an existing match.  */
>>        amo_curr = state->amo_reserved_list;
>> @@ -878,6 +886,7 @@ execute_a (SIM_CPU *cpu, unsigned_word iw, const struct riscv_opcode *op)
>>        amo_curr->next = state->amo_reserved_list;
>>        state->amo_reserved_list = amo_curr;
>>        goto done;
>> +    case MATCH_SC_D:
>>      case MATCH_SC_W:
>>        TRACE_INSN (cpu, "%s %s, %s, (%s);", op->name, rd_name, rs2_name,
>>  		  rs1_name);
>> @@ -889,7 +898,12 @@ execute_a (SIM_CPU *cpu, unsigned_word iw, const struct riscv_opcode *op)
>>  	  if (amo_curr->addr == riscv_cpu->regs[rs1])
>>  	    {
>>  	      /* We found a reservation, so operate it.  */
>> -	      sim_core_write_unaligned_4 (cpu, riscv_cpu->pc, write_map,
>> +	      if (op->xlen_requirement == 64)
>> +		sim_core_write_aligned_8 (cpu, riscv_cpu->pc, write_map,
>> +					  riscv_cpu->regs[rs1],
>> +					  riscv_cpu->regs[rs2]);
>> +	      else
>> +		sim_core_write_aligned_4 (cpu, riscv_cpu->pc, write_map,
>>  					  riscv_cpu->regs[rs1],
>>  					  riscv_cpu->regs[rs2]);
>>  	      store_rd (cpu, rd, 0);
>> @@ -913,14 +927,14 @@ execute_a (SIM_CPU *cpu, unsigned_word iw, const struct riscv_opcode *op)
>>    TRACE_INSN (cpu, "%s %s, %s, (%s);",
>>  	      op->name, rd_name, rs2_name, rs1_name);
>>    if (op->xlen_requirement == 64)
>> -    tmp = sim_core_read_unaligned_8 (cpu, riscv_cpu->pc, read_map,
>> -				     riscv_cpu->regs[rs1]);
>> +    tmp = sim_core_read_aligned_8 (cpu, riscv_cpu->pc, read_map,
>> +				   riscv_cpu->regs[rs1]);
>>    else
>> -    tmp = EXTEND32 (sim_core_read_unaligned_4 (cpu, riscv_cpu->pc, read_map,
>> -					       riscv_cpu->regs[rs1]));
>> +    tmp = EXTEND32 (sim_core_read_aligned_4 (cpu, riscv_cpu->pc, read_map,
>> +					     riscv_cpu->regs[rs1]));
>>    store_rd (cpu, rd, tmp);
>>  
>> -  switch (op->match)
>> +  switch (op->match & ~mask_aqrl)
>>      {
>>      case MATCH_AMOADD_D:
>>      case MATCH_AMOADD_W:
>> @@ -931,25 +945,37 @@ execute_a (SIM_CPU *cpu, unsigned_word iw, const struct riscv_opcode *op)
>>        tmp = riscv_cpu->regs[rd] & riscv_cpu->regs[rs2];
>>        break;
>>      case MATCH_AMOMAX_D:
>> -    case MATCH_AMOMAX_W:
>>        tmp = max ((signed_word) riscv_cpu->regs[rd],
>>  		 (signed_word) riscv_cpu->regs[rs2]);
>>        break;
>> +    case MATCH_AMOMAX_W:
>> +      tmp = max ((int32_t) riscv_cpu->regs[rd],
>> +		 (int32_t) riscv_cpu->regs[rs2]);
> 
> I wonder if we should use EXTEND32 here?  There seems to be a mix of
> styles throughout the file.
> 

No that is not necessary, because the value is just used to write the
result back to memory using sim_core_write_unaligned_4/8 and that should
ignore the high word.  But I will better try that out before I send
the next version of this patch.
Other places pass the value to store_rd so the high word will be visible in
an riscv64 architecture register.

>> +      break;
>>      case MATCH_AMOMAXU_D:
>> -    case MATCH_AMOMAXU_W:
>>        tmp = max ((unsigned_word) riscv_cpu->regs[rd],
>>  		 (unsigned_word) riscv_cpu->regs[rs2]);
>>        break;
>> +    case MATCH_AMOMAXU_W:
>> +      tmp = max ((uint32_t) riscv_cpu->regs[rd],
>> +		 (uint32_t) riscv_cpu->regs[rs2]);
> 
> And maybe EXTEND32 again here, but I guess we'd still need the cast to
> unsigned_word.
> 

Same here, some kind of sign extension happens, but the high word should be
ignored and there is no ugly compiler warning either.

But I can try that out if it behaves correctly...


Thanks
Bernd.
  

Patch

diff --git a/sim/riscv/sim-main.c b/sim/riscv/sim-main.c
index e4b15b533ba..be38baa2f93 100644
--- a/sim/riscv/sim-main.c
+++ b/sim/riscv/sim-main.c
@@ -841,6 +841,9 @@  execute_m (SIM_CPU *cpu, unsigned_word iw, const struct riscv_opcode *op)
 static sim_cia
 execute_a (SIM_CPU *cpu, unsigned_word iw, const struct riscv_opcode *op)
 {
+  unsigned_word mask_aq = OP_MASK_AQ << OP_SH_AQ;
+  unsigned_word mask_rl = OP_MASK_RL << OP_SH_RL;
+  unsigned_word mask_aqrl = mask_aq | mask_rl;
   struct riscv_sim_cpu *riscv_cpu = RISCV_SIM_CPU (cpu);
   SIM_DESC sd = CPU_STATE (cpu);
   struct riscv_sim_state *state = RISCV_SIM_STATE (sd);
@@ -855,13 +858,18 @@  execute_a (SIM_CPU *cpu, unsigned_word iw, const struct riscv_opcode *op)
   sim_cia pc = riscv_cpu->pc + 4;
 
   /* Handle these two load/store operations specifically.  */
-  switch (op->match)
+  switch (op->match & ~mask_aqrl)
     {
+    case MATCH_LR_D:
     case MATCH_LR_W:
       TRACE_INSN (cpu, "%s %s, (%s);", op->name, rd_name, rs1_name);
-      store_rd (cpu, rd,
-	sim_core_read_unaligned_4 (cpu, riscv_cpu->pc, read_map,
-				   riscv_cpu->regs[rs1]));
+      if (op->xlen_requirement == 64)
+	tmp = sim_core_read_aligned_8 (cpu, riscv_cpu->pc, read_map,
+				       riscv_cpu->regs[rs1]);
+      else
+	tmp = EXTEND32 (sim_core_read_aligned_4 (cpu, riscv_cpu->pc, read_map,
+						 riscv_cpu->regs[rs1]));
+      store_rd (cpu, rd, tmp);
 
       /* Walk the reservation list to find an existing match.  */
       amo_curr = state->amo_reserved_list;
@@ -878,6 +886,7 @@  execute_a (SIM_CPU *cpu, unsigned_word iw, const struct riscv_opcode *op)
       amo_curr->next = state->amo_reserved_list;
       state->amo_reserved_list = amo_curr;
       goto done;
+    case MATCH_SC_D:
     case MATCH_SC_W:
       TRACE_INSN (cpu, "%s %s, %s, (%s);", op->name, rd_name, rs2_name,
 		  rs1_name);
@@ -889,7 +898,12 @@  execute_a (SIM_CPU *cpu, unsigned_word iw, const struct riscv_opcode *op)
 	  if (amo_curr->addr == riscv_cpu->regs[rs1])
 	    {
 	      /* We found a reservation, so operate it.  */
-	      sim_core_write_unaligned_4 (cpu, riscv_cpu->pc, write_map,
+	      if (op->xlen_requirement == 64)
+		sim_core_write_aligned_8 (cpu, riscv_cpu->pc, write_map,
+					  riscv_cpu->regs[rs1],
+					  riscv_cpu->regs[rs2]);
+	      else
+		sim_core_write_aligned_4 (cpu, riscv_cpu->pc, write_map,
 					  riscv_cpu->regs[rs1],
 					  riscv_cpu->regs[rs2]);
 	      store_rd (cpu, rd, 0);
@@ -913,14 +927,14 @@  execute_a (SIM_CPU *cpu, unsigned_word iw, const struct riscv_opcode *op)
   TRACE_INSN (cpu, "%s %s, %s, (%s);",
 	      op->name, rd_name, rs2_name, rs1_name);
   if (op->xlen_requirement == 64)
-    tmp = sim_core_read_unaligned_8 (cpu, riscv_cpu->pc, read_map,
-				     riscv_cpu->regs[rs1]);
+    tmp = sim_core_read_aligned_8 (cpu, riscv_cpu->pc, read_map,
+				   riscv_cpu->regs[rs1]);
   else
-    tmp = EXTEND32 (sim_core_read_unaligned_4 (cpu, riscv_cpu->pc, read_map,
-					       riscv_cpu->regs[rs1]));
+    tmp = EXTEND32 (sim_core_read_aligned_4 (cpu, riscv_cpu->pc, read_map,
+					     riscv_cpu->regs[rs1]));
   store_rd (cpu, rd, tmp);
 
-  switch (op->match)
+  switch (op->match & ~mask_aqrl)
     {
     case MATCH_AMOADD_D:
     case MATCH_AMOADD_W:
@@ -931,25 +945,37 @@  execute_a (SIM_CPU *cpu, unsigned_word iw, const struct riscv_opcode *op)
       tmp = riscv_cpu->regs[rd] & riscv_cpu->regs[rs2];
       break;
     case MATCH_AMOMAX_D:
-    case MATCH_AMOMAX_W:
       tmp = max ((signed_word) riscv_cpu->regs[rd],
 		 (signed_word) riscv_cpu->regs[rs2]);
       break;
+    case MATCH_AMOMAX_W:
+      tmp = max ((int32_t) riscv_cpu->regs[rd],
+		 (int32_t) riscv_cpu->regs[rs2]);
+      break;
     case MATCH_AMOMAXU_D:
-    case MATCH_AMOMAXU_W:
       tmp = max ((unsigned_word) riscv_cpu->regs[rd],
 		 (unsigned_word) riscv_cpu->regs[rs2]);
       break;
+    case MATCH_AMOMAXU_W:
+      tmp = max ((uint32_t) riscv_cpu->regs[rd],
+		 (uint32_t) riscv_cpu->regs[rs2]);
+      break;
     case MATCH_AMOMIN_D:
-    case MATCH_AMOMIN_W:
       tmp = min ((signed_word) riscv_cpu->regs[rd],
 		 (signed_word) riscv_cpu->regs[rs2]);
       break;
+    case MATCH_AMOMIN_W:
+      tmp = min ((int32_t) riscv_cpu->regs[rd],
+		 (int32_t) riscv_cpu->regs[rs2]);
+      break;
     case MATCH_AMOMINU_D:
-    case MATCH_AMOMINU_W:
       tmp = min ((unsigned_word) riscv_cpu->regs[rd],
 		 (unsigned_word) riscv_cpu->regs[rs2]);
       break;
+    case MATCH_AMOMINU_W:
+      tmp = min ((uint32_t) riscv_cpu->regs[rd],
+		 (uint32_t) riscv_cpu->regs[rs2]);
+      break;
     case MATCH_AMOOR_D:
     case MATCH_AMOOR_W:
       tmp = riscv_cpu->regs[rd] | riscv_cpu->regs[rs2];
@@ -968,11 +994,11 @@  execute_a (SIM_CPU *cpu, unsigned_word iw, const struct riscv_opcode *op)
     }
 
   if (op->xlen_requirement == 64)
-    sim_core_write_unaligned_8 (cpu, riscv_cpu->pc, write_map,
-				riscv_cpu->regs[rs1], tmp);
+    sim_core_write_aligned_8 (cpu, riscv_cpu->pc, write_map,
+			      riscv_cpu->regs[rs1], tmp);
   else
-    sim_core_write_unaligned_4 (cpu, riscv_cpu->pc, write_map,
-				riscv_cpu->regs[rs1], tmp);
+    sim_core_write_aligned_4 (cpu, riscv_cpu->pc, write_map,
+			      riscv_cpu->regs[rs1], tmp);
 
  done:
   return pc;
@@ -1307,7 +1333,15 @@  execute_one (SIM_CPU *cpu, unsigned_word iw, const struct riscv_opcode *op)
   switch (op->insn_class)
     {
     case INSN_CLASS_A:
-      return execute_a (cpu, iw, op);
+      /* Check whether model with A extension is selected.  */
+      if (riscv_cpu->csr.misa & 1)
+	return execute_a (cpu, iw, op);
+      else
+	{
+	  TRACE_INSN (cpu, "UNHANDLED EXTENSION: %d", op->insn_class);
+	  sim_engine_halt (sd, cpu, NULL, riscv_cpu->pc, sim_signalled,
+			   SIM_SIGILL);
+	}
     case INSN_CLASS_C:
       /* Check whether model with C extension is selected.  */
       if (riscv_cpu->csr.misa & 4)