`timescale 1ns / 1ps // NW release 14.6.2018 // with ASRAM, and gpio // PS/2 mouse and Nordic radio network 7.1.2014 PDR /* Wojtek's To Do, May/7/2022 1. Adopt a consistent capitalization convention in the code and in the PORT. Bad example: NEN, SDled, leds, btn, are all PORT signals connected to the FPGA pins. 2. All active LOW signals shall use consistent suffixes. Choose one: _n, _b, or _bar. 3. Implement switches and LEDs for RiskOne (via shift registers) 4. 5. */ // Top level FPGA pin assignments with HDMI by Magnus Karlsson for Pepino (I think) // Risk... boards use ZBT rather than ASRAM module RISC5Top ( input CLK50M, // clock 50 MHz input button, // reset button? input [3:0] btn, // other four buttons input [7:0] swi, // switches input RxD, // RS-232 Rx output TxD, // RS-232 Tx output [7:0] leds, // diag LEDs; absent on RiskOne or RiskFive output SDled, // SD card LED output SRce0, SRce1, // SRAM Chip Enable strobes for two ASRAM chips output SRwe, // SRAM Write strobe, active LOW output SRoe, // SRAM Out Enable strobe, active LOW; not connected on Risk... boards output [3:0] SRbe, // individual byte enable write to ASRAM chip output [18:0] SRadr, // SRAM addr; 19 bits --> 1 MB inout [31:0] SRdat, // SRAM data input [1:0] MISO, // SPI - SD card & wireless network output [1:0] SCLK, // SPI - SD card & wireless network output [1:0] MOSI, // SPI - SD card & wireless network output [1:0] SS, // SPI select, 2 devices - SD card & wireless network output NEN, // wireless network enable (in addition to SS select!) output [3:0] TMDS, // HDMI + signals output [3:0] TMDSB, // HDMI - signals input PS2C, PS2D, // keyboard inout msclk, msdat, // mouse inout [7:0] gpio); // GPIO pins; absent on RiskOne //---------- END of top level PORT --------------------- reg [7:0] Lreg; // local LED register assign leds = Lreg; // "leds" FPGA pins are connected to the local register // On RiskOne we will send Lreg to an on-board shift register every millisecond wire clkfbout, pllclk0, pllclk1, pllclk2, pllclk3; wire pll_locked; wire clk50; reg clk; // clock 25 MHz created "by hand". This should come from the PLL reg rst; // reset signal wire [23:0] adr; // truncated local address, 24 bits w/o MSBs wire [3:0] iowadr; // System Device register address (UART, SPI, etc.) wire [31:0] inbus0; // data bus from memory to multiplexer wire [31:0] inbus; // data bus from mux to RISC core wire [31:0] outbus; // data bus from RISC core to RAM, registers, etc wire [31:0] romout; // boot code to RISC core; muxed wire [31:0] codebus; // RAM code to RISC core; RAM muxed with boot BRAM wire SRbe0, SRbe1; // RAM chip byte enable. ZBT will probably need four of these. wire rd, wr; // read and write strobes wire ben; // byte enable strobe while accessing individual bytes wire ioenb; // IO space for System Device registers (UART, SPI, etc.) wire vidreq; // video core is asking for access to RAM wire [7:0] dataTx, dataRx, dataKbd; wire rdyRx, doneRx, startTx, rdyTx, rdyKbd, doneKbd; wire [27:0] dataMs; // mouse data used by mouseP reg bitrate; // setting for UART wire limit; // Pulsed for one clock every 1 ms when cnt0 overflows reg [15:0] cnt0; // used to generate 1 ms pulses reg [31:0] cnt1; // counts milliseconds wire [31:0] spiRx; wire spiStart, spiRdy; reg [3:0] spiCtrl; wire [17:0] vidadr; // RAM address from video controller. Used to retrieve pixel data. wire vde, vid, hsync, vsync; reg [7:0] gpout, gpoc; // GPIO. No use because RiskOne does not provide GPIO. wire [7:0] gpin; //---------------- Multiple clocks Clock Generator ---------------- // RiskOne: change input clock from 50 MHz to 25 MHz // We should run the CPU at 50 MHz. // How about System Devices? Do they depend on clock frequency? PLL_BASE # ( .CLKIN_PERIOD(20), // 20 ns == 50 MHz WRONG. RiskOne == 25 MHz FIX ME .CLKFBOUT_MULT(15), // 750 MHz base frequency .CLKOUT0_DIVIDE(1), // 750 MHz .CLKOUT1_DIVIDE(10), // 75 MHz pixel clock .CLKOUT2_DIVIDE(5), // 150 MHz .CLKOUT3_DIVIDE(15), // 50 MHz .COMPENSATION("INTERNAL") ) pll_blk ( .CLKFBOUT(clkfbout), .CLKOUT0(pllclk0), // 750 MHz .CLKOUT1(pllclk1), // 75 MHz for video pixel clock .CLKOUT2(pllclk2), // 150 MHz for HDMI .CLKOUT3(pllclk3), // 50 MHz .CLKOUT4(), .CLKOUT5(), .LOCKED(pll_locked), .CLKFBIN(clkfbout), .CLKIN(CLK50M), // RiskoOne board clock = 25 MHz FIX ME .RST(1'b0) ); BUFG pclkbufg (.I(pllclk1), .O(pclk)); BUFG pclkx2bufg (.I(pllclk2), .O(pclkx2)); // HDMI clock 150 MHz BUFG clkbufg (.I(pllclk3), .O(clk50)); // CPU clock 50 MHz //---------------- RISC5 Soft CPU ----------------- // Bus diagram is in PO.Computer.pdf Figure 17.2, page 14. // adr out from CPU, 24-bit address bus. Bits [1:0] are addressing bytes. // inbus0 from RAM to a mux, where it gets muxed with System Devices. // either data, or instructions from RAM (von Neumann architecture) // inbus either inbus0 or System Devices, depending on IO Enable "ioenb" // codebus from RAM or Boot PROM, muxed between the two. Used to fetch instructions. // outbus from CPU to RAM and System Devices. No need to mux. // Boot memory address at startup is hard coded in the CPU code, PO.Computer.pdf page 12. // This is bad practice. Should be rather made GENERIC. RISC5 riscx( .clk(clk), // main clock 25 MHz .rst(rst), // reset .irq(limit), // interrupt every 1 millisecond should be multisource FIX ME. .rd(rd), .wr(wr), // read and write strobes, active HIGH .ben(ben), // byte access strobe active HIGH; bytes are addressed with addr[1:0] .stallX(vidreq), // video controller is suspending the CPU cycles .adr(adr), // out from CPU, 24-bit address bus; in future use more bits .codebus(codebus), // data bus to CPU; fetch instructions; muxed between RAM and PROM .inbus(inbus), // data bus to CPU; muxed between RAM and System Devices .outbus(outbus) // data bus from CPU; to RAM and Devices; not muxed ); // On-chip BRAM block named Program Memory (PM); used to store the boot loader. // Not sure why it is using the inverted clock. Also, the inversion could be done with PLL. // In the original FW the PROM held 512 instructions [511:0], 32-bits each. // This PROM is mapped at StartAdr, hard coded in the CPU module. PROM PM (.adr(adr[10:2]), .data(romout), .clk(~clk)); //-------------- UART ------------------ RS232R receiver(.clk(clk), .rst(rst), .RxD(RxD), .fsel(bitrate), .done(doneRx), .data(dataRx), .rdy(rdyRx)); RS232T transmitter(.clk(clk), .rst(rst), .start(startTx), .fsel(bitrate), .data(dataTx), .TxD(TxD), .rdy(rdyTx)); //---------------- SPI: SD card and radio network ---------------- // Shared SPI with slow/fast speed switchover for the SD card SPI spi(.clk(clk), .rst(rst), .start(spiStart), .dataTx(outbus), .fast(spiCtrl[2]), .dataRx(spiRx), .rdy(spiRdy), .SCLK(SCLK[0]), .MOSI(MOSI[0]), .MISO(MISO[0] & MISO[1])); //---------------- Video: memory mapped in RAM ---------------- VID video(.pclk(pclk), .clk(clk), .req(vidreq), .inv(swi[7]), .vidadr(vidadr), .viddata(inbus0), .vde(vde), .vid(vid), .hsync(hsync), .vsync(vsync)); //---------------- Keyboard and mouse ---------------- PS2 kbd(.clk(clk), .rst(rst), .done(doneKbd), .rdy(rdyKbd), .shift(), .data(dataKbd), .PS2C(PS2C), .PS2D(PS2D)); MouseP Ms(.clk(clk), .rst(rst), .msclk(msclk), .msdat(msdat), .out(dataMs)); //---------------- DVI / HDMI video driver ---------------- DVI dvi(.clkx1in(pclk), .clkx2in(pclkx2), .clkx10in(pllclk0), .pll_locked(pll_locked), .reset(~rst), .red_in({8{vid}}), .green_in({8{vid}}), .blue_in({8{vid}}), .hsync(hsync), .vsync(vsync), .vde(vde), .TMDS(TMDS), .TMDSB(TMDSB)); //-------------------------------------------------------------------------- //---- Hardwired Memory Maps of Boot PROM, RAM, and System Devices ------ //-------------------------------------------------------------------------- //----- Switch between external RAM and Boot BRAM (named PROM) --------- // Address [32:0] == [any byte, 3FF, any 14 bits] is mapped to boot PROM. // Boot PROM size is 4096 bytes. It needs address bits [13:0]. // Switch over is triggered in firmware. It does not require any software action. // In case we need a larger PROM, we can add more bits to PROM address. // To move the PROM to a different address, we can change the address tag. // Bus diagram is in PO.Computer.pdf Figure 17.2, page 14. // "romout" == data bus from PROM; "inbus0" == data bus from external RAM. // "codebus" == data bus to the CPU, fetching the instructions from PROM or RAM. assign codebus = (adr[23:14] == 10'h3FF) ? romout : inbus0; // either PROM or RAM // ------------------------------------------------------------------------------ // Connecting the System Devices to the on chip interconnect bus: SPI, UART, etc // References: // [1] PO.Computer.pdf, Chapter 17 "The processor's environment", page 13. // [2] N.Wirth "The RISC Architecture" RISC-Arch-1.pdf, page 3. // ------------------------------------------------------------------------------ // System Devices are mapped at the highest memory addresses. // System Devices were used in the original 2013 FPGA Oberon on Spartan-3. // Keyboard, mouse, SPI, millisecond timer, UART, etc. The addresses jump by 4, // because the registers are 4-byte wide, while RISC5 is addressing individual bytes. // The number of registers (up to 16) is not sufficient for any substantial firmware. // The GPIO #8 and #9 are a temporary fix. In a longer run we will adopt Fpro framework by Chu. // The register addresses are shared in in/out directions. Two registers are overlaid at // the same address, one "in" and one "out". I do not like this design practice, because // we will write to the address and then read a different value from the same address. // We are stuck with this because we do not want to change the software System Drivers. // // Be aware that the same address is expressed in *five* different ways which coexist. // The same address is coded in full Hex, in truncated Hex, in positive decimal, in negative decimal, // and as a running number. The "full Hex" uses all 32 bits. The truncated Hex uses // only 24 bits routed from the CPU to the fabric. (We may route more bits in the future.) // The same 24-bit address will get aliased to multiple locations in the full 32-bit space. // Any bit combination in the highest 8 bits will yield the same 24-bit address. // E.g., register #2 == -56 == FFFF FFC8 == 4,294,967,240 in the 32 bit address space. // The same register #2 will be seen as FF FFC8 == 16,777,160 on the 24 bit address bus. // The negative "-56" is the 2-s complement signed number with the same 32-bit pattern. // Summary of bits use of the full 32 bit address. // ---------------------------------------------- // Upper most 32:24 = 8 bits: used by software, but not routed to the FPGA fabric. // Bits 23:6 (eighteen bits) select the System Device address space when all "1". // Otherwise used as regular address. // Bits 5:2 (four bits) select one of sixteen System Device slots, one address each. // Bits 1:0 (two bits) ignored because they address individual bytes. // The following addresses must NOT be changed because they are used in software drivers. // This table is reproduced from [2], page 3. The negative decimal addresses are different // from the ones in [1], page 13, but the hex addresses are the same. Most likely, // the table in [1] on page 13 is wrong. // # | Adr | hex addr | Input dir | Output dir // 0 | -64 | FFFF FFC0 | ms timer | reserved // 1 | -60 | FFFF FFC4 | switches | LEDs // 2 | -56 | FFFF FFC8 | UART Rx in | UART Tx out // 3 | -52 | FFFF FFCC | UART status | UART control // 4 | -48 | FFFF FFD0 | SPI data in | SPI data out (radio network, SD card, ...) // 5 | -44 | FFFF FFD4 | SPI status | SPI control // 6 | -40 | FFFF FFD8 | PS2 kbd | -- // 7 | -36 | FFFF FFDC | mouse | -- // 8 | -32 | FFFF FFE0 | GPIO | -- // not present in [1] // 9 | -28 | FFFF FFE4 | GPIO tri-state control // not present in [1] // System Device selection // 4 bits select one of sixteen 32-bit System Device Addresses: 0, 1, 2, 3, etc. // Two lowest address bits are discarded because they select individual bytes. // (In principle, we could deal with particular bytes in case of UART, for example.) // Range = 16 addresses was decided by using only 4 bits [5:2]. // If we want more System Device Addresses, then we have to use more bits. assign iowadr = adr[5:2]; // System Device register number 0, 1, 2, .... // Bits [23:6] of the truncated 24-bit address will activate System Device IO Enable. // After truncating the lowest six bits [5:0] the selector consists of eighteen "1". // Depending on "ioenb", connect either RAM or one of the System Devices to CPU bus. assign ioenb = (adr[23:6] == 18'h3FFFF); // System Device memory space // Below is n-to-1 mux circuit. Most likely a priority routing mux, Chu pages 36 and 49. // Since this is a cascaded if-then-else circuit, it may generate a long time delay. // Alternatively, it could be coded with a case statement, Chu System Verilog book, page 51. //------------ CPU is reading from the following sources ------------- // inbus is input to the CPU. inbus0 comes from RAM. // System Devices are small IP cores whose registers are muxed below, direction "in". // Direction "out" is handled in the clocked "always" block at the end of this file. // On RiskOne, switch register "swi" needs be read from a shift register every millisecond. // On RiskOne, LED register Lreg needs be written to a shift register every millisecond. assign inbus = ~ioenb ? inbus0 : // read RAM inbus0, unless reading System Devices ((iowadr == 0) ? cnt1 : // 32-bit System Timer; 2^32*1 ms = 49.7 days // (iowadr == 1) ? {20'b0, btn, swi} : // original boards; Spartan-3 or Pepino (iowadr == 1) ? {20'b0, 4'b0, 8'b0} : // 4 buttens, 8 switches, not present on this board (iowadr == 2) ? {24'b0, dataRx} : // 1 byte; dataTx is "out" direction, so not here (iowadr == 3) ? {30'b0, rdyTx, rdyRx} : // UART status 2 bits; direction "in" (iowadr == 4) ? spiRx : (iowadr == 5) ? {31'b0, spiRdy} : (iowadr == 6) ? {3'b0, rdyKbd, dataMs} : (iowadr == 7) ? {24'b0, dataKbd} : (iowadr == 8) ? {24'b0, gpin} : // GPIO; plenty on RiskOne, can be used for development (iowadr == 9) ? {24'b0, gpoc} : 0); //--- Original Wirth: Byte enable signals used for Async SRAM ---------- //--- TO BE CHANGED TO ZBT RAM ---------- // Probably reflecting the fact that the original Spartan-3 board used two 16-bit ASRAM chips. //assign SRce0 = ~(~ben | ~adr[1]); // ASRAM Chip Enable TO BE CHANGED //assign SRce1 = ~(~ben | adr[1]); // ASRAM Chip Enable //assign SRbe0 = ~(~ben | ~adr[0]); // ASRAM BYTE read //assign SRbe1 = ~(~ben | adr[0]); // ASRAM BYTE read //assign SRbe = {SRbe1, SRbe0, SRbe1, SRbe0}; // original Spartan-3 board? // SkuTek boards: individual byte enable signals for ZBT memory; only write cycles. // The ZBT chips do not support byte masking during read. // Byte selection during read has to be done locally by muxing single bytes onto the bus. // See a diagram in PO.Computer.pdf, "17.1. The SRAM memory", page 14. wire [3:0] byte_en; // active LOW // Byte enable only if ben is asserted by the processor. Two LSBs are addressing individual bytes. // Here we are de-muxing two LSBs into four "byte_en" wires, active LOW. // If writing the whole word, all four "byte_en" wires must be LOW. // If ben is not asserted by the CPU (it means whole word access), all four wires must stay LOW. assign byte_en = {(ben & (adr[1:0] != 2'b11)), (ben & (adr[1:0] != 2'b10)), // I am not sure why we use the "!=" operator. (ben & (adr[1:0] != 2'b01)), (ben & (adr[1:0] != 2'b00))}; // Original Wirth solution for writing to ASRAM. The on-chip devices do *not* use // this approach. A combinational circuit is applied to the SRAM Write Enable SRwe signal. // The clock cycle is subdivided by half based on clock level. It violates the rule // that actions are performed on a rising clock edge. // This solution is applied to the asynch memory chip which had no clock. // Delaying the SRwe by half a clock cycle meets the ASRAM timing requirement, // expressed in *nanoseconds*. This works *only* up to a certain clock frequency. // When the frequency is too high, then 1/2 the clock cycle may be too fast for ASRAM. // ZBT memory is *not* using this approach because it is clocked. //assign SRwe = ~wr | clk; // original // Writing to RAM is prevented during reading video pixels (vidreq == HIGH). // During a regular write cycle, Write Enable is driven LOW during the 2nd half of the clock. // The *falling* clock edge is asserting the SRAM Write Enable. This kind of solution // is strongly discouraged by Chu! // In the ZBT case, the clock *is* present. Signals should be latched on rising clock edges. assign SRwe = vidreq ? 1'b1: ~(wr & clk); // clk == 25 MHz; this is bad circuit anyway // ZBT Output Enable is permanently tied low on the board. No need to drive it. //assign SRoe = wr; //------------- Bus host switchover: CPU or video --------------------- // Video core is stealing the CPU memory cycles, using Video Request "vidreq". // The video core will brutally stall the processor in order to access the bus // while it is reading the video data from the RAM. // In original Wirth, RAM had one port shared between the video and the CPU. // There were two hosts on this bus, CPU and video. Video was brutally stalling the CPU. // Full address range [20:0] --> (2^22 - 1) == 3F FFFF = 4,194,303 == 4 megabytes // Below we are multiplexing the address from the Video core or the main processor. // Word addresses are used. Byte address bits [1:0] are handled separately, // using individual Byte Enable pins of the RAM chips. assign SRadr = vidreq ? {1'b0, vidadr} : adr[20:2]; // external ASRAM chip address // Vedant implemented 3-state buffer inside the ZBT Controller, using behavioral Verilog // Below it is using Xilinx Library Component instantiation genvar i; generate // tri-state buffer for SRAM for (i = 0; i < 32; i = i+1) begin: bufblock IOBUF SRbuf (.I(outbus[i]), .O(inbus0[i]), .IO(SRdat[i]), .T(~wr)); end endgenerate //-------------- END of RAM Access Sharing Between CPU and Video --------------- //-------------- System Devices (i.e., peripherals) --------------- //-- Sending is combinational here. Data is latched in devices themselves. //-- Receiving is clocked under a single "always block" at the end of this file. // tri-state buffer for 8-bit gpio port. There is *no* such port on RiskFive. // On RiskOne we can assign several expansion pins to such GPIOs generate for (i = 0; i < 8; i = i+1) begin: gpioblock IOBUF gpiobuf (.I(gpout[i]), .O(gpin[i]), .IO(gpio[i]), .T(~gpoc[i])); end endgenerate // Device #2: UART Tx sent out here. Rx is received in the clocked "always" block. assign dataTx = outbus[7:0]; assign startTx = wr & ioenb & (iowadr == 2); assign doneRx = rd & ioenb & (iowadr == 2); // generate a 1 millisecond timer tick assign limit = (cnt0 == 24999); // a clock tick every 1 ms, assuming 25 MHz clock // Device #4: SPI. Two devices: SD card and radio network (which is present on Risk...) assign spiStart = wr & ioenb & (iowadr == 4); // start SPI by writing to Device 4 assign SS = ~spiCtrl[1:0]; //active low slave select assign MOSI[1] = MOSI[0], SCLK[1] = SCLK[0], NEN = spiCtrl[3]; // NEN == Network Enable //assign SDled = spiCtrl[0]; // LED indicator, not present (to be added in future) // Device #7: keyboard assign doneKbd = rd & ioenb & (iowadr == 7); // Implement switches and LEDs for RiskOne (via shift registers) // This has to be done with register --> shift register --> FPGA pins // On RiskOne, switch register "swi" needs be read from a shift register every millisecond. // On RiskOne, LED register Lreg needs be written to a shift register every millisecond. // Both shall be done with firmware state machines rather than software. // Both machines can be implemented in a separate "always" block for clarity. // Receiving from System Devices all done under a single clocked section. // All of these are registers. always @(posedge clk) begin // Assert reset button; wait a second before doing reset // btn array is not present on Risk... This code has to be changed. rst <= ((cnt1[4:0] == 0) & limit) ? ~(btn[3] | button) : rst; // Device #1. Eight LEDs. Blank them on reset. On request, read and latch them from outbus. // On RiskOne, send Lreg to serial shift register every millisecond. // On RiskOne, receive swi from serial shift register every millisecond. Lreg <= ~rst ? 0 : (wr & ioenb & (iowadr == 1)) ? outbus[7:0] : Lreg; // Time management // wrap-around counter is counting system clock cycles cnt0 <= limit ? 0 : cnt0 + 1; // wrap around when "limit" has hit a millisecond // Count millisecond ticks. Every millisecond limit == "1" for only one clock. cnt1 <= cnt1 + limit; // This is the System Clock how long the System is up and running // Device #5: SPI control status register. Zero it on reset. Otherwise read and latch 4 bits. spiCtrl <= ~rst ? 0 : (wr & ioenb & (iowadr == 5)) ? outbus[3:0] : spiCtrl; // Device #3: UART. Zero "bitrate" on reset. On request, write one bit into this register. bitrate <= ~rst ? 0 : (wr & ioenb & (iowadr == 3)) ? outbus[0] : bitrate; // GPIO pins // Device #8: GPIO pins (not present on RiskFive!). On request, write 8 bits into them. gpout <= (wr & ioenb & (iowadr == 8)) ? outbus[7:0] : gpout; // Device #9: GPIO tri-state control (not present on RiskFive!). Zero on reset. Set them on request. gpoc <= ~rst ? 0 : (wr & ioenb & (iowadr == 9)) ? outbus[7:0] : gpoc; end // This is crazy. Why do it by hand? // RiskOne: board clock == 25 MHz directly from a crystal oscillator always @ (posedge clk50) clk <= ~clk; // clock 25 MHz endmodule