Attention une mise à jour du serveur va être effectuée le lundi 17 mai entre 13h et 13h30. Cette mise à jour va générer une interruption du service de quelques minutes.

Commit e92dd219 authored by ROKICKI Simon's avatar ROKICKI Simon

Merge branch 'cacheAndDiv'

parents 38b6d10d d546cd3a
......@@ -64,7 +64,7 @@ Comet implements the 32bit ISA with the M extension (hardware multiply) and has
The build process needs to be configured accordingly.
```
./configure --prefix=$RISCV --with-arch=rv32i --with-abi=ilp32
./configure --prefix=$RISCV --with-arch=rv32im --with-abi=ilp32
make
```
......
/*
* alu.h
*
* Created on: 17 avr. 2019
* Author: simon
*/
#ifndef INCLUDE_ALU_H_
#define INCLUDE_ALU_H_
#include <riscvISA.h>
#include <pipelineRegisters.h>
//#include <cstdio>
class ALU {
protected:
bool wait;
public:
virtual bool process(struct DCtoEx dctoEx, ac_int<32, false> &result, bool &stall) =0;
};
class BasicAlu {
public:
void process(struct DCtoEx dctoEx, struct ExtoMem &extoMem, bool &stall){
stall = false;
extoMem.pc = dctoEx.pc;
extoMem.opCode = dctoEx.opCode;
extoMem.rd = dctoEx.rd;
extoMem.funct3 = dctoEx.funct3;
extoMem.we = dctoEx.we;
extoMem.isBranch = 0;
extoMem.useRd = dctoEx.useRd;
extoMem.isLongInstruction = 0;
extoMem.instruction = dctoEx.instruction;
ac_int<13, false> imm13 = 0;
imm13[12] = dctoEx.instruction[31];
imm13.set_slc(5, dctoEx.instruction.slc<6>(25));
imm13.set_slc(1, dctoEx.instruction.slc<4>(8));
imm13[11] = dctoEx.instruction[7];
ac_int<13, true> imm13_signed = 0;
imm13_signed.set_slc(0, imm13);
ac_int<5, false> shamt = dctoEx.instruction.slc<5>(20);
// switch must be in the else, otherwise external op may trigger default case
switch(dctoEx.opCode)
{
case RISCV_LUI:
extoMem.result = dctoEx.lhs;
break;
case RISCV_AUIPC:
extoMem.result = dctoEx.lhs + dctoEx.rhs;
break;
case RISCV_JAL:
//Note: in current version, the addition is made in the decode stage
//The value to store in rd (pc+4) is stored in lhs
extoMem.result = dctoEx.lhs;
break;
case RISCV_JALR:
//Note: in current version, the addition is made in the decode stage
//The value to store in rd (pc+4) is stored in lhs
extoMem.nextPC = dctoEx.rhs + dctoEx.lhs;
extoMem.isBranch = 1;
extoMem.result = dctoEx.pc+4;
break;
case RISCV_BR:
extoMem.nextPC = extoMem.pc + imm13_signed;
switch(dctoEx.funct3)
{
case RISCV_BR_BEQ:
extoMem.isBranch = (dctoEx.lhs == dctoEx.rhs);
break;
case RISCV_BR_BNE:
extoMem.isBranch = (dctoEx.lhs != dctoEx.rhs);
break;
case RISCV_BR_BLT:
extoMem.isBranch = (dctoEx.lhs < dctoEx.rhs);
break;
case RISCV_BR_BGE:
extoMem.isBranch = (dctoEx.lhs >= dctoEx.rhs);
break;
case RISCV_BR_BLTU:
extoMem.isBranch = ((ac_int<32, false>)dctoEx.lhs < (ac_int<32, false>)dctoEx.rhs);
break;
case RISCV_BR_BGEU:
extoMem.isBranch = ((ac_int<32, false>)dctoEx.lhs >= (ac_int<32, false>)dctoEx.rhs);
break;
}
break;
case RISCV_LD:
extoMem.isLongInstruction = 1;
extoMem.result = dctoEx.lhs + dctoEx.rhs;
break;
case RISCV_ST:
extoMem.datac = dctoEx.datac;
extoMem.result = dctoEx.lhs + dctoEx.rhs;
break;
case RISCV_OPI:
switch(dctoEx.funct3)
{
case RISCV_OPI_ADDI:
extoMem.result = dctoEx.lhs + dctoEx.rhs;
break;
case RISCV_OPI_SLTI:
extoMem.result = dctoEx.lhs < dctoEx.rhs;
break;
case RISCV_OPI_SLTIU:
extoMem.result = (ac_int<32, false>)dctoEx.lhs < (ac_int<32, false>)dctoEx.rhs;
break;
case RISCV_OPI_XORI:
extoMem.result = dctoEx.lhs ^ dctoEx.rhs;
break;
case RISCV_OPI_ORI:
extoMem.result = dctoEx.lhs | dctoEx.rhs;
break;
case RISCV_OPI_ANDI:
extoMem.result = dctoEx.lhs & dctoEx.rhs;
break;
case RISCV_OPI_SLLI: // cast rhs as 5 bits, otherwise generated hardware is 32 bits
// & shift amount held in the lower 5 bits (riscv spec)
extoMem.result = dctoEx.lhs << (ac_int<5, false>)dctoEx.rhs;
break;
case RISCV_OPI_SRI:
if (dctoEx.funct7.slc<1>(5)) //SRAI
extoMem.result = dctoEx.lhs >> (ac_int<5, false>)shamt;
else //SRLI
extoMem.result = (ac_int<32, false>)dctoEx.lhs >> (ac_int<5, false>)shamt;
break;
}
break;
case RISCV_OP:
if(dctoEx.funct7.slc<1>(0)) // M Extension
{
}
else{
switch(dctoEx.funct3){
case RISCV_OP_ADD:
if (dctoEx.funct7.slc<1>(5)) // SUB
extoMem.result = dctoEx.lhs - dctoEx.rhs;
else // ADD
extoMem.result = dctoEx.lhs + dctoEx.rhs;
break;
case RISCV_OP_SLL:
extoMem.result = dctoEx.lhs << (ac_int<5, false>)dctoEx.rhs;
break;
case RISCV_OP_SLT:
extoMem.result = dctoEx.lhs < dctoEx.rhs;
break;
case RISCV_OP_SLTU:
extoMem.result = (ac_int<32, false>)dctoEx.lhs < (ac_int<32, false>)dctoEx.rhs;
break;
case RISCV_OP_XOR:
extoMem.result = dctoEx.lhs ^ dctoEx.rhs;
break;
case RISCV_OP_SR:
if(dctoEx.funct7.slc<1>(5)) // SRA
extoMem.result = dctoEx.lhs >> (ac_int<5, false>)dctoEx.rhs;
else // SRL
extoMem.result = (ac_int<32, false>)dctoEx.lhs >> (ac_int<5, false>)dctoEx.rhs;
break;
case RISCV_OP_OR:
extoMem.result = dctoEx.lhs | dctoEx.rhs;
break;
case RISCV_OP_AND:
extoMem.result = dctoEx.lhs & dctoEx.rhs;
break;
}
}
break;
case RISCV_MISC_MEM: // this does nothing because all memory accesses are ordered and we have only one core
break;
case RISCV_SYSTEM:
switch(dctoEx.funct3)
{ // case 0: mret instruction, dctoEx.memValue should be 0x302
case RISCV_SYSTEM_ENV:
#ifndef __HLS__
//TODO handling syscall correctly
//extoMem.result = sim->solveSyscall(dctoEx.lhs, dctoEx.rhs, dctoEx.datac, dctoEx.datad, dctoEx.datae, exit);
#endif
break;
case RISCV_SYSTEM_CSRRW: // lhs is from csr, rhs is from reg[rs1]
extoMem.datac = dctoEx.rhs; // written back to csr
extoMem.result = dctoEx.lhs; // written back to rd
break;
case RISCV_SYSTEM_CSRRS:
extoMem.datac = dctoEx.lhs | dctoEx.rhs;
extoMem.result = dctoEx.lhs;
break;
case RISCV_SYSTEM_CSRRC:
extoMem.datac = dctoEx.lhs & ((ac_int<32, false>)~dctoEx.rhs);
extoMem.result = dctoEx.lhs;
break;
case RISCV_SYSTEM_CSRRWI:
extoMem.datac = dctoEx.rhs;
extoMem.result = dctoEx.lhs;
break;
case RISCV_SYSTEM_CSRRSI:
extoMem.datac = dctoEx.lhs | dctoEx.rhs;
extoMem.result = dctoEx.lhs;
break;
case RISCV_SYSTEM_CSRRCI:
extoMem.datac = dctoEx.lhs & ((ac_int<32, false>)~dctoEx.rhs);
extoMem.result = dctoEx.lhs;
break;
}
break;
}
//If the instruction was dropped, we ensure that isBranch is at zero
if (!dctoEx.we){
extoMem.isBranch = 0;
extoMem.useRd = 0;
}
}
};
class MultAlu: public ALU {
public:
ac_int<32, false> quotient, remainder;
//ac_int<33, false>
ac_int<6, false> state = 0;
bool resIsNeg;
int i;
ac_int<32, false> dataAUnsigned, dataBUnsigned;
bool process(struct DCtoEx dctoEx, ac_int<32, false> &result, bool &stall){
//no need to fill in the output register fields, the first ALU has that taken care of
bool valRet = false;
if (dctoEx.opCode == RISCV_OP && dctoEx.funct7 == RISCV_OP_M) {
if (state == 0) {
dataAUnsigned.set_slc(0, dctoEx.lhs);
dataBUnsigned.set_slc(0, dctoEx.rhs);
//mult results
ac_int<32, false> resultU = dataAUnsigned * dataBUnsigned;
ac_int<32, false> resultS = dctoEx.lhs * dctoEx.rhs;
ac_int<32, false> resultSU = dctoEx.lhs * dataBUnsigned;
resIsNeg = dctoEx.lhs[31] ^ dctoEx.rhs[31];
switch (dctoEx.funct3){
case RISCV_OP_M_MUL:
result = resultS.slc<32>(0);
valRet = true;
break;
case RISCV_OP_M_MULH:
result = resultS.slc<32>(32);
valRet = true;
break;
case RISCV_OP_M_MULHSU:
result = resultSU.slc<32>(32);
valRet = true;
break;
case RISCV_OP_M_MULHU:
result = resultU.slc<32>(32);
valRet = true;
break;
case RISCV_OP_M_DIV:
if(dctoEx.lhs[31]) {
dataAUnsigned = -dctoEx.lhs;
}
if(dctoEx.rhs[31]) {
dataBUnsigned = -dctoEx.rhs;
}
//printf("Dividing %d by %d\n", dataAUnsigned, dataBUnsigned);
case RISCV_OP_M_DIVU:
if(dataBUnsigned == 0) {
result = -1;
valRet = true;
}
else {
state = 32;
quotient = 0;
remainder = 0;
}
break;
case RISCV_OP_M_REM:
if(dctoEx.lhs[31]) {
dataAUnsigned = -dctoEx.lhs;
}
if(dctoEx.rhs[31]) {
dataBUnsigned = -dctoEx.rhs;
}
//printf("Moduling %d by %d\n", dataAUnsigned, dataBUnsigned);
case RISCV_OP_M_REMU:
if(dataBUnsigned == 0) {
result = dataAUnsigned;
}
else {
state = 32;
quotient = 0;
remainder = 0;
}
break;
}
}
else {
//Loop for the division
for(i = 0; i < 4; i++)
{
state--;
remainder = remainder << 1;
remainder[0] = dataAUnsigned[state];
if(remainder >= dataBUnsigned) {
remainder = remainder - dataBUnsigned;
quotient[state] = 1;
}
}
//printf("Quotient : %d, Remainder : %d\n", quotient, remainder);
if(state == 0) {
switch(dctoEx.funct3) {
case RISCV_OP_M_DIV:
if(resIsNeg)
result = -quotient;
else
result = quotient;
valRet = true;
break;
case RISCV_OP_M_DIVU:
result = quotient;
valRet = true;
break;
case RISCV_OP_M_REM:
if(dataAUnsigned[31])
result = -remainder;
else
result = remainder;
valRet = true;
break;
case RISCV_OP_M_REMU:
result = remainder;
valRet = true;
break;
}
//printf("result : %d\n", extoMem.result);
}
}
stall |= (state != 0);
}
return valRet;
}
};
#endif /* INCLUDE_ALU_H_ */
......@@ -34,9 +34,15 @@
//template<int OFFSET_SIZE, int TAG_SIZE, int SET_SIZE, int ASSOCIATIVITY>
class CacheMemory: public MemoryInterface {
public:
#ifdef __HLS__
IncompleteMemory *nextLevel;
#else
MemoryInterface *nextLevel;
#endif
ac_int<TAG_SIZE+LINE_SIZE*8, false> cacheMemory[SET_SIZE][ASSOCIATIVITY];
ac_int<16, false> age[SET_SIZE][ASSOCIATIVITY];
ac_int<40, false> age[SET_SIZE][ASSOCIATIVITY];
ac_int<1, false> dataValid[SET_SIZE][ASSOCIATIVITY];
......@@ -49,6 +55,16 @@ public:
memOpType nextLevelOpType;
ac_int<32, false> nextLevelDataIn;
ac_int<32, false> nextLevelDataOut;
ac_int<40, false> cycle;
ac_int<LOG_ASSOCIATIVITY, false> setMiss;
bool isValid;
bool wasStore = false;
ac_int<LOG_ASSOCIATIVITY, false> setStore;
ac_int<LOG_SET_SIZE, false> placeStore;
ac_int<LINE_SIZE*8+TAG_SIZE, false> valStore;
ac_int<32, false> dataOutStore;
bool nextLevelWaitOut;
bool VERBOSE = false;
......@@ -58,7 +74,11 @@ public:
#ifdef __HLS__
CacheMemory(IncompleteMemory *nextLevel, bool v){
#else
CacheMemory(MemoryInterface *nextLevel, bool v){
#endif
this->nextLevel = nextLevel;
for (int oneSetElement = 0; oneSetElement<SET_SIZE; oneSetElement++){
for (int oneSet = 0; oneSet < ASSOCIATIVITY; oneSet++){
......@@ -80,191 +100,242 @@ public:
ac_int<TAG_SIZE, false> tag = addr.slc<TAG_SIZE>(LOG_LINE_SIZE + LOG_SET_SIZE); // startAddress is log(lineSize) + log(setSize) + 2
ac_int<LOG_LINE_SIZE, false> offset = addr.slc<LOG_LINE_SIZE-2>(2); //bitSize is log(lineSize), start address is 2(because of #bytes in a word)
if (!nextLevelWaitOut && opType != NONE){
if (cacheState == 0){
numberAccess++;
// fprintf(stderr, "Reading at addr %x\n", addr);
if (!nextLevelWaitOut){
cycle++;
if (wasStore || cacheState == 1){
cacheMemory[placeStore][setStore] = valStore;
age[placeStore][setStore] = cycle;
dataValid[placeStore][setStore] = 1;
dataOut = dataOutStore;
wasStore = false;
cacheState = 0;
}
else if (opType != NONE){
ac_int<LINE_SIZE*8+TAG_SIZE, false> val1 = cacheMemory[place][0];
ac_int<LINE_SIZE*8+TAG_SIZE, false> val2 = cacheMemory[place][1];
ac_int<LINE_SIZE*8+TAG_SIZE, false> val3 = cacheMemory[place][2];
ac_int<LINE_SIZE*8+TAG_SIZE, false> val4 = cacheMemory[place][3];
ac_int<TAG_SIZE, false> tag1 = val1.slc<TAG_SIZE>(0);
ac_int<TAG_SIZE, false> tag2 = val2.slc<TAG_SIZE>(0);
ac_int<TAG_SIZE, false> tag3 = val3.slc<TAG_SIZE>(0);
ac_int<TAG_SIZE, false> tag4 = val4.slc<TAG_SIZE>(0);
ac_int<1, false> valid1 = dataValid[place][0];
ac_int<1, false> valid2 = dataValid[place][1];
ac_int<1, false> valid3 = dataValid[place][2];
ac_int<1, false> valid4 = dataValid[place][3];
bool hit1 = (tag1 == tag) && dataValid[place][0];
bool hit2 = (tag2 == tag) && dataValid[place][1];
bool hit3 = (tag3 == tag) && dataValid[place][2];
bool hit4 = (tag4 == tag) && dataValid[place][3];
ac_int<16, false> age1 = age[place][0];
ac_int<16, false> age2 = age[place][1];
ac_int<16, false> age3 = age[place][2];
ac_int<16, false> age4 = age[place][3];
bool hit = hit1 | hit2 | hit3 | hit4;
ac_int<LOG_ASSOCIATIVITY, false> set = 0;
ac_int<LINE_SIZE*8, false> selectedValue;
if (cacheState == 0){
numberAccess++;
// fprintf(stdout, "Reading at addr %x\n", addr);
if (hit1){
selectedValue = val1.slc<LINE_SIZE*8>(TAG_SIZE);
set = 0;
}
if (hit2){
selectedValue = val2.slc<LINE_SIZE*8>(TAG_SIZE);
set = 0;
}
ac_int<TAG_SIZE, false> tag1 = val1.slc<TAG_SIZE>(0);
ac_int<TAG_SIZE, false> tag2 = val2.slc<TAG_SIZE>(0);
ac_int<TAG_SIZE, false> tag3 = val3.slc<TAG_SIZE>(0);
ac_int<TAG_SIZE, false> tag4 = val4.slc<TAG_SIZE>(0);
if (hit3){
selectedValue = val3.slc<LINE_SIZE*8>(TAG_SIZE);
set = 0;
}
bool hit1 = (tag1 == tag) && valid1;
bool hit2 = (tag2 == tag) && valid2;
bool hit3 = (tag3 == tag) && valid3;
bool hit4 = (tag4 == tag) && valid4;
if (hit4){
selectedValue = val4.slc<LINE_SIZE*8>(TAG_SIZE);
set = 0;
}
bool hit = hit1 | hit2 | hit3 | hit4;
ac_int<LOG_ASSOCIATIVITY, false> set = 0;
ac_int<LINE_SIZE*8, false> selectedValue;
ac_int<TAG_SIZE, false> tag;
ac_int<8, true> signedByte;
ac_int<16, true> signedHalf;
ac_int<32, true> signedWord;
if (hit){
//First we handle the store
if (opType == STORE){
switch(mask) {
case BYTE:
cacheMemory[place][set].set_slc((((int) addr.slc<2>(0)) << 3) + TAG_SIZE + 4*8*offset, dataIn.slc<8>(0));
break;
case HALF:
cacheMemory[place][set].set_slc((addr[1] ? 16 : 0) + TAG_SIZE + 4*8*offset, dataIn.slc<16>(0));
break;
case WORD:
cacheMemory[place][set].set_slc(TAG_SIZE + 4*8*offset, dataIn);
break;
}
if (hit1){
selectedValue = val1.slc<LINE_SIZE*8>(TAG_SIZE);
tag = tag1;
set = 0;
}
else {
switch(mask) {
case BYTE:
signedByte = selectedValue.slc<8>((((int)addr.slc<2>(0)) << 3) + 4*8*offset);
signedWord = signedByte;
dataOut.set_slc(0, signedWord);
break;
case HALF:
signedHalf = selectedValue.slc<16>((addr[1] ? 16 : 0) + 4*8*offset);
signedWord = signedHalf;
dataOut.set_slc(0, signedWord);
break;
case WORD:
dataOut = selectedValue.slc<32>(4*8*offset);
break;
case BYTE_U:
dataOut = selectedValue.slc<8>((((int) addr.slc<2>(0))<<3) + 4*8*offset) & 0xff;
break;
case HALF_U:
dataOut = selectedValue.slc<16>((addr[1] ? 16 : 0) + 4*8*offset) & 0xffff;
break;
if (hit2){
selectedValue = val2.slc<LINE_SIZE*8>(TAG_SIZE);
tag = tag2;
set = 1;
}
if (hit3){
selectedValue = val3.slc<LINE_SIZE*8>(TAG_SIZE);
tag = tag3;
set = 2;
}
if (hit4){
selectedValue = val4.slc<LINE_SIZE*8>(TAG_SIZE);
tag = tag4;
set = 3;
}
ac_int<8, true> signedByte;
ac_int<16, true> signedHalf;
ac_int<32, true> signedWord;
if (hit){
ac_int<LINE_SIZE*8+TAG_SIZE, false> localValStore = 0;
localValStore.set_slc(TAG_SIZE, selectedValue);
localValStore.set_slc(0, tag);
//First we handle the store
if (opType == STORE){
switch(mask) {
case BYTE:
localValStore.set_slc((((int) addr.slc<2>(0)) << 3) + TAG_SIZE + 4*8*offset, dataIn.slc<8>(0));
break;
case HALF:
localValStore.set_slc((addr[1] ? 16 : 0) + TAG_SIZE + 4*8*offset, dataIn.slc<16>(0));
break;
case WORD:
localValStore.set_slc(TAG_SIZE + 4*8*offset, dataIn);
break;
}
placeStore = place;
setStore = set;
valStore = localValStore;
wasStore = true;
}
else {
switch(mask) {
case BYTE:
signedByte = selectedValue.slc<8>((((int)addr.slc<2>(0)) << 3) + 4*8*offset);
signedWord = signedByte;