`timescale 1ns / 1ps // 31.8.2018 // Soft CPU with byte access, interrupt, and floating-point //---------------------------------------------------------------------------------- // RISC5 with interrupt and floating-point. Module Name: RISC5_int_float.v // Original engineer: Niklaus Wirth. Original release: Aug/31/2018 // Comments and cleanup: WS May/08/2022 // Target Devices: any FPGA // Tool Versions: Vivado (Artix-7) or ISE (Artix-7 100T and 200T, Spartan-6) // Notes: // 1. Byte access muxing is done in this module. // 2. Interrupt vector is located at mem address 4. [RISC5.Update.pdf page 2.] // 3. 24 bits of address seem to be hardcoded in this module. // 4. StartAdr is explained in PO.Computer page 12. The boot PROM is mapped at StartAdr. // It should be made GENERIC. // "On startup, the flag PMsel is set and IR is loaded from pmout (from the BRAM) at StartAdr." // 5. Version register is badly needed. The robust mechanism should be provided. //---------------------------------------------------------------------------------- module RISC5( input clk, // clock 25 MHz in ASRAM environment, 50+ MHz otherwise input rst, // reset input irq, // interrupt request; ISR served at RAM address = 4. input stallX, // separate bus controller is stealing CPU cycles input [31:0] inbus, // data bus in input [31:0] codebus, // instruction bus in output [23:0] adr, // hardcoded address range 24 bits output rd, // read strobe active HIGH output wr, // write strobe active HIGH output ben, // byte access request strobe active HIGH; bytes addressed with addr[1:0] output [31:0] outbus // data bus out // output [31:0] version // lower half: date and daily revision. Upper: 4 ASCII characters. ); // This is bad practice. Should be rather made GENERIC and instantiated at top level. localparam StartAdr = 22'h3FF800; // boot address at startup, PO.Computer.pdf page 12 reg [21:0] PC; // program counter reg [31:0] IR; // instruction register reg N, Z, C, OV; // condition flags: negative, zero, carry, overflow reg [31:0] H; // aux register for MUL or DIV results reg stallL1; // CPU is stalled and waiting for completion of some operation wire [21:0] pcmux, pcmux0; // muxes for the next PC source wire [21:0] nxpc; // address of the next instruction; generally old PC+1 wire cond, S; wire sa, sb, sc; // alias names of IR instruction fields wire p, q, u, v; wire [3:0] op; // operation code from IR // use which register for port A, B, C of the register bank // A is input port; B & C are output ports // PO.Computer page 3 wire [3:0] ira, ira0, irb, irc; // which reg is read/written over port A, B, C0 wire [2:0] cc; // Instruction fields for jumps or memory access // RISC.pdf page 4 and 5. wire [15:0] imm; // Instr. format F1: immediate value in the instruction itself wire [19:0] off; // Instr. format F2: relative store location in memory wire [21:0] disp; // Instr. format F3: relative jump in memory (it was 24 bits in 2015) wire regwr; // write strobe to Register Bank, active HIGH // stall signals from various IP cores (e.g., multiplier) wire stall, stallL0, stallM, stallD, stallFA, stallFM, stallFD; wire nn, zz, cx, vv; // objects for interrupt handling; see RISC5.Update.pdf reg irq1; // for interrupt edge detection reg intEnb, intPnd; // interrupt enable, interrupt pending reg intMd; // CPU is in interrupt mode reg [25:0] SPC; // saved PC on interrupt wire intAck; // interrupt acknowledge (wire, not register) wire [31:0] inbus1; // ben=HIGH --> full word; LOW --> one selected byte wire [31:0] aluRes; // ALU result wire [31:0] A; // input port A of the local register file wire [31:0] B, C0, C1; // output ports B, C0 of the local register file wire [31:0] regmux; // muxing inputs to port A wire [31:0] lshout, rshout; // left and right bit shift wire [31:0] quotient, remainder; // fixed point division wire [63:0] product; // fixed point multiplication wire [31:0] fsum, fprod, fquot; // floating point objects wire ADD, SUB, MUL, DIV; // instruction alias wires... wire FAD, FSB, FML, FDV; // ...with logic equations... wire LDR, STR, BR, RTI; // ...but not used in the ALU Registers regs (.clk(clk), .wr(regwr), .rno0(ira0), .rno1(irb), .rno2(irc), .din(regmux), .dout0(A), .dout1(B), .dout2(C0)); Multiplier mulUnit (.clk(clk), .run(MUL), .stall(stallM), .u(~u), .x(B), .y(C1), .z(product)); Divider divUnit (.clk(clk), .run(DIV), .stall(stallD), .u(~u), .x(B), .y(C1), .quot(quotient), .rem(remainder)); LeftShifter LSUnit (.x(B), .y(lshout), .sc(C1[4:0])); RightShifter RSUnit(.x(B), .y(rshout), .sc(C1[4:0]), .md(IR[16])); FPAdder fpaddx (.clk(clk), .run(FAD|FSB), .u(u), .v(v), .stall(stallFA), .x(B), .y({FSB^C0[31], C0[30:0]}), .z(fsum)); FPMultiplier fpmulx (.clk(clk), .run(FML), .stall(stallFM), .x(B), .y(C0), .z(fprod)); FPDivider fpdivx (.clk(clk), .run(FDV), .stall(stallFD), .x(B), .y(C0), .z(fquot)); // mnemonics for register instruction bit fields; RISC.pdf page 5 assign p = IR[31]; assign q = IR[30]; assign u = IR[29]; assign v = IR[28]; assign cc = IR[26:24]; assign op = IR[19:16]; // instruction code bits assign ira = IR[27:24]; // which reg written to through port A assign irb = IR[23:20]; // which reg read from through port B assign irc = IR[3:0]; // which reg read from through port C0 // RISC.pdf page 4; sections 3.2 and 3.3 // Instruction bit fields give immediate value, or memory offsets in words (not bytes) // Offset tells where the next instruction is located, relative to some register content. // Offset is in words, not in bytes, because instruction words are 4 bytes. // Memory load/store format F2: offset field = 20 bits. // Branch instruction format F3: offset field = 21 bits; 24 bits were used in 2015 assign imm = IR[15:0]; // reg instr, format F1; immediate value from instruction itself assign off = IR[19:0]; // mem instr, format F2; offset = 2*4 MB max assign disp = IR[21:0]; // branch instr; format F3; offset = 4*4=16 MB max; was 24 bits in 2015 // Instruction mnemonics also provide logic equations at the same time. // The logic equations are decoding the instructions --> single bit each. // Only one of them can be TRUE (I hope so...). These single bits are then used // to choose only one path while executing the instruction. Effectively, // these equations form a CASE statement, but it is coded combinatorial // rather than in the clocked "always block". // Amazingly, these equations are NOT used in the ALU logic. assign ADD = ~p & (op == 8); assign SUB = ~p & (op == 9); assign MUL = ~p & (op == 10); assign DIV = ~p & (op == 11); assign FAD = ~p & (op == 12); assign FSB = ~p & (op == 13); assign FML = ~p & (op == 14); assign FDV = ~p & (op == 15); assign LDR = p & ~q & ~u; assign STR = p & ~q & u; assign BR = p & q; assign RTI = BR & ~u & ~v & IR[4]; // Return from Interrupt; see RISC5.Update.pdf assign ira0 = BR ? 15 : ira; assign C1 = q ? {{16{v}}, imm} : C0; // adr == external address out // 24 bit address is (most likely) enforced below. // If we need more address bits, we (probably) should modify the following. assign adr = stallL0 ? B[23:0] + {{4{off[19]}}, off} : {pcmux, 2'b00}; // Strobes active HIGH: read, write, individual byte access request // Strobes are issued when the CPU is not stalled assign rd = LDR & ~stallX & ~stallL1; // read strobe assign wr = STR & ~stallX & ~stallL1; // write strobe assign ben = p & ~q & v & ~stallX & ~stallL1; // byte enable strobe // Arithmetic-logical unit (ALU) interprets most instructions. // PO.Computer page 2. Section 16.2. RISC-Arch-1.pdf page 1. // ALU operates in parallel with the Cuntrol Unit, see below. // The operand is either a register or a part of the instruction itself // --------------------------------------------------------------------- // Register or instruction part --> ALU --> register via regmux. // This code is very different from the code in PO.Computer.pdf page 4. // Instruction mnemonics are not used. The mnemonics are a leftover from 2015. // Paul Reed explained that the new code is better balanced timing wise. // 4 bits op = IR[19:16]. See RISC-Arch-1.pdf rev. 9.8.2018, page 1. assign aluRes = // assigned to a CPU register, passing via regmux. ~op[3] ? (~op[2] ? (~op[1] ? (~op[0] ? (q ? // MOV (~u ? {{16{v}}, imm} : {imm, 16'b0}) : (~u ? C0 : (~v ? H : {N, Z, C, OV, 20'b0, 8'h53}))) : lshout) : // LSL rshout) : // ASR, ROR (~op[1] ? (~op[0] ? B & C1 : B & ~C1) : // AND, ANN (~op[0] ? B | C1 : B ^ C1))) : // IOR, XOR (~op[2] ? (~op[1] ? (~op[0] ? B + C1 + (u&C) : B - C1 - (u&C)) : // ADD, SUB (~op[0] ? product[31:0] : quotient)) : // MUL, DIV (~op[1] ? // flt.pt. fsum : (~op[0] ? fprod : fquot))); // Internal register bank. PO.Computer.pdf page 4. // regwr = store or do not store the result to the register bank // regmux = choose input to the bank among ALU, RAM, or next program address nxpc // ------------------------------------------------------------------------------ assign regwr = ~p & ~stall | (LDR & ~stallX & ~stallL1) | (BR & cond & v & ~stallX); assign regmux = LDR ? inbus1 : // LDR --> fetch data from inbus1. Either a full word, or one byte. (BR & v) ? {8'b0, nxpc, 2'b0} : aluRes; // either next instruction address, or ALU // Byte access muxing; ben == byte access flag, active HIGH despite "n" at the end // ben==LOW --> word access; otherwise assign one byte to the lowest 8 bits // PO.Computer, page 14, Figure 17.3 Multiplexers for SRAM byte access // ------------------------------------------------------------------------------ assign inbus1 = ~ben ? inbus : // ben == LOW --> full word access w/o muxing // ben = HIGH. Now we perform byte shuffling according to two lowest address bits. {24'b0, (adr[1] ? (adr[0] ? inbus[31:24] : inbus[23:16]) : (adr[0] ? inbus[15:8] : inbus[7:0]))}; assign outbus = ~ben ? A : adr[1] ? (adr[0] ? {A[7:0], 24'b0} : {8'b0, A[7:0], 16'b0}) : (adr[0] ? {16'b0, A[7:0], 8'b0} : {24'b0, A[7:0]}); // Control unit CU operates in parallel to ALU. PO.Computer.pdf page 14 // CU jumps from the current to the next instruction (not always sequentially) // While ALU executes the current instruction, CU fetches the next one. // Four possible next instruction sources: PO.Computer page 10. // -------------------------------------------------------------------- // Program Counter PC = address of current instr. // Instr. Register IR = current instruction itself. // -------------------------------------------------------------------- assign S = N ^ OV; assign nxpc = PC + 1; // speculatively advance program address to next address assign cond = IR[27] ^ ((cc == 0) & N | // MI, PL (cc == 1) & Z | // EQ, NE (cc == 2) & C | // CS, CC (cc == 3) & OV | // VS, VC (cc == 4) & (C|Z) | // LS, HI (cc == 5) & S | // LT, GE (cc == 6) & (S|Z) | // LE, GT (cc == 7)); // T, F assign intAck = intPnd & intEnb & ~intMd & ~stall; // interrupt acknowledge assign pcmux = ~rst | stall | intAck | RTI ? // choose the next instr. address (~rst | stall ? (~rst ? StartAdr : PC) : // on reset jump to StartAdr (intAck ? 1 : SPC)) : pcmux0; // return from interrupt via SPC // Alert: [23:2] is coded below for unknown reason. assign pcmux0 = (BR & cond) ? (u? nxpc + disp : C0[23:2]) : nxpc; assign sa = aluRes[31]; assign sb = B[31]; assign sc = C1[31]; assign nn = RTI ? SPC[25] : regwr ? regmux[31] : N; assign zz = RTI ? SPC[24] : regwr ? (regmux == 0) : Z; assign cx = RTI ? SPC[23] : ADD ? (~sb&sc&~sa) | (sb&sc&sa) | (sb&~sa) : SUB ? (~sb&sc&~sa) | (sb&sc&sa) | (~sb&sa) : C; assign vv = RTI ? SPC[22] : ADD ? (sa&~sb&~sc) | (~sa&sb&sc): SUB ? (sa&~sb&sc) | (~sa&sb&~sc) : OV; assign stallL0 = (LDR|STR) & ~stallL1; // only load or store instructions // stall the CPU for whatever reason; any IP core can force the stall assign stall = stallL0 | stallM | stallD | stallX | stallFA | stallFM | stallFD; always @ (posedge clk) begin PC <= pcmux; // next instruction address from one of many sources IR <= stall ? IR : codebus; // load instruction register from codebus when CPU is not stalled stallL1 <= stallX ? stallL1 : stallL0; N <= nn; Z <= zz; C <= cx; OV <= vv; // load aux register H with either MUL or DIV result H <= MUL ? product[63:32] : DIV ? remainder : H; // interrupt handling; RISC5.Update page 2 irq1 <= irq; // edge detector intPnd <= rst & ~intAck & ((~irq1 & irq) | intPnd); intMd <= rst & ~RTI & (intAck | intMd); intEnb <= ~rst ? 0 : (BR & ~u & ~v & IR[5]) ? IR[0] : intEnb; // SPC == saved PC on interrupt; RISC5.Update page 2 SPC <= (intAck) ? {nn, zz, cx, vv, pcmux0} : SPC; end endmodule