/**********************************************
 ICE Proprietary Software - do NOT disseminate
 **********************************************/
/*
  DDR-4 Standard DRAM Controller (256Mby x 64b)

  MODE: 1=DDR/PIC5 2=DDR2/PIC6 3=DDR2/ZPPM 4=DDR2/VxM

  Pipelined command R/W queues each 16 bursts (64by) deep

  Jeff Schoen
  Innovative Computer Engineering, Inc.
  5/15/2005

  This module uses 4 DRAMs to interface the 1333Mby/s input/output streams 
  to the 2666Mby/sec DDR interface.  All transfers must be naturally aligned.
  Burst transfers are 4, 16, 32, or 64 bytes in length.

  Use 64by banks and wait 11 cycles between same bank activations.

  Use delayed CAS to allow interleaved RAS for next bank

  Need 512 byte cache line controller to eliminate bank waits

  Memory control word:
  [31] nCS on 2nd cycle (config register)
  [30] FN on 1st cycle
  [29:28]=xfer 0-4by 1-16by 2-32by 3-64by
  [27:0]=ADDR [7:6]=BANK [2:0]=0

  [27:0]=ADDR [8:7]=BANK [2:0]=0

  PPC addr to SDRAM addr bits:
    64by banks:  crbl CBA9 8765 4321 0|987 6543 bb21 0xxx

  Losses:
    Refresh  20/1536
    TurnAround  5/64

  Cycle Timing:
    CAS can only be in slots 0 or 2
    Put RAS in slot 0 and CAS in 2
    Put refresh in slot 3 to avoid CAS/RAS slot conflict

  Busy
 !B
  Acl		Refresh
  1 RAS NOP NOP NOP
  2 NOP NOP NOP NOP
  3 NOP NOP NOP NOP
  4 NOP NOP CAS NOP
  1 ras NOP CAS NOP
  2 NOP NOP CAS NOP
  3 NOP NOP CAS NOP
  4 NOP NOP cas NOP
  1
  x NOP NOP NOP RFR

  Memory Cache Line Mapping:

*/
`include "mdefs.h"
`include "ramdefs.h"

`define MOSTD #(.IOSTANDARD("SSTL12_DCI"),.SLEW("FAST"))
`define MDOSTD #(.IOSTANDARD("DIFF_SSTL12"),.SLEW("FAST"))
`define MIOSTD #(.IOSTANDARD("POD12_DCI"),.SLEW("FAST"),.IBUF_LOW_PWR("FALSE"))
`define MDIOSTD #(.IOSTANDARD("DIFF_POD12_DCI"),.SLEW("FAST"),.IBUF_LOW_PWR("FALSE"))

module sddrXram (
  clks,rst, ago,apd,acmd,afull,qrdym,qselm,
  ioclk, irdy,iena,ibus, ordy,oena,obus,
  mckp,mckn,mcke,mwe,mcas,mras,ma,mba,mdm,mdq,mdqsp,mdqsn,mcs,mrst,modt,mact,
  mclk,trs, test);

  parameter IBW=128;
  parameter MODE=8;
  parameter MBL=8;

  localparam NBA=3;
  localparam NA=14;
  localparam NDQS=MBL;		// DDR4 ram byte lanes 
  localparam NDQ=8*NDQS;	// DDR4 ram bit lanes

  localparam MBB=9;		// for 32K RAMs
  localparam IBB=10;		// for 32K RAMs
  localparam MBW=512;

  localparam CHOP=(IBW==128)?1:0;
  localparam NKR=(IBW==128)?16:32;

//  localparam RACKDLY=CHOP?13:12;	// CL5=11 CL10=14 CL11+.5phase=15
  localparam RACKDLY=14;	// CL5=11 CL10=14 CL11+.5phase=15
//  localparam WACKDLY=5;		// CWL9=7 CWL11=8
  localparam WACKDLY=5;		// CWL9=7 CWL11=8

  input [40:0] clks;		// all memory clocks - see below

  // command group sync with ioclk
  input rst,ago,apd;		
  input [35:0] acmd;		// command address and modifiers
  output afull;			// cache output buffer almost full
  output [23:0] qrdym;
  input  [15:0] qselm;		// {RRi[7:1],qi_sel[2:0],ro_sel[2:0],ri_sel[2:0]}

  // crossbar bus sync with ioclk
  input  ioclk, iena,oena;
  input  irdy,ordy;
  input  [IBW-1:0] obus;
  output [IBW-1:0] ibus;

  // actual DRAM signals
  output mckp,mckn,mcke,mwe,mcas,mras;
  output [NA-1:0] ma; 
  output [NBA-1:0] mba;
  inout  [NDQS-1:0] mdm;
  inout  [NDQ-1:0] mdq;
  inout  [NDQS-1:0] mdqsp,mdqsn;
  output mcs,mrst,modt,mact;

  // possible feedback signals - not used
  input mclk;			
  output [2:0] trs;
  output [7:0] test;

  wire H=1, L=0;

  // clocks
  wire usrclk,usrrst,dbgclk;	
  wire aclk = usrclk;
  wire rclk = usrclk;
  wire wclk = usrclk;
  wire sysclkp = clks[0];
  wire sysclkn = clks[1];
  wire rmems = clks[2];
  wire sclk  = clks[3];

  // scheduler configuration
  wire [15:0] srcfg = clks[31:16];
  wire [15:0] rcfg = (srcfg!=0)? srcfg : 16'h8FFF;

  // resets
  wire rstn;  delaypipe #(60) startup (aclk,H,rstn);
  reg arst; always @(posedge aclk) arst <= !rstn || rmems;
  reg brst; always @(posedge aclk) brst <= !rstn || rmems || rst;
  wire srst = arst;
  wire calib_done;
 
  // debug signals
  reg [3:0] ttt; always @(posedge aclk) ttt <= clks[35:32];
  wire t1=ttt[0],t2=ttt[1],t3=ttt[2],t4=ttt[3];

  genvar i,j;

  wire rack,wack,wen,weni,wenj;
  wire renl,renu,wenl,wenu,ienax,oenax;
  wire [MBW-1:0] rdata,wdata;
  wire [IBB-1:0] iadrx,oadrx;
  wire [MBB-1:0] rcntl,rcntu,wcntl,wcntu;

  wire acl,refresh;
  wire [35:0] qcmd;
  wire [31:0] tdata;
  wire [7:0] stest;
  wire calcy = qcmd[0];
  wire calup = qcmd[1];
  wire calupd,renx;
  reg calrw;
  
  scheduler #(2,IBB,MBB,RACKDLY,WACKDLY) cnt (
        ioclk,brst, ago,acmd,afull, qrdym,qselm,
        ioclk,irdy,iena,ienax,iadrx,
	ioclk,ordy,oena,oenax,oadrx,
        aclk,acl,qcmd,refresh, 
	rclk,rack,renl,rcntl,renu,rcntu, 
	wclk,wack,wenl,wcntl,wenu,wcntu,
        sclk,rcfg,stest); //,tdata);

  // DMAR cache data array 
  sdpram #(NKR,MBW,IBW,0,0) rl (rclk,H, rcntl[MBB-1:CHOP],renx,rdata,  ioclk,H, iadrx,ibus);

  // DMAW cache data array 
  sdpram #(NKR,IBW,MBW,0,0) wl (ioclk,H, oadrx,oenax,obus,  wclk,H, wcntl[MBB-1:CHOP],wdata);

  // test vectors
  reg wrst; always @(posedge wclk) wrst <= srst;
  wire [255:0] wtstb = 256'hFFFFFFFFEEEEEEEEDDDDDDDDCCCCCCCCBBBBBBBBAAAAAAAA9999999988888888; 
  wire [255:0] wtsta = 256'h7777777766666666555555554444444433333333222222221111111100000000;
  reg [511:0] wpat; always @(posedge wclk) if (wrst) wpat <= {wtstb,wtsta}; else wpat <= {wpat,wpat[511:32]};

  assign renx = CHOP? renl : renl;

  wire cfg_ = L; // qcmd[31];
  reg fmcs,fmodt,fmdm,fmcke;
  reg fcmd0,fcmd1,fcmd2,fcmd3,fcmd4,fcmdx;
  reg arw,acfg,acal,dual,crw,ccal,cdual;
  reg [NBA-1:0] bank,cbank;
  reg [31:2] addr,caddr;

  // want to alter bank groups every other slot for RRD_S vs RRD_L
  wire [NBA-1:0] qbnk = cfg_? qcmd[8:6] : CHOP? {qcmd[7],qcmd[9:8]} : {qcmd[8],qcmd[10:9]};

  // burst DMA state machine
  always @(posedge aclk) begin
    if (acl) arw   <= qcmd[35];		else if (calcy) arw  <= calrw;		// read/write
    if (acl) dual  <= qcmd[33:32]==2;	else if (calcy) dual <= H;		// 128by length
    if (acl) addr  <= qcmd[31:2];	else if (calcy) addr <= 0;		// start RAM address
    if (acl) bank  <= qbnk[2:0];	else if (calcy) bank <= 0;		// start RAM bank 
    if (acl) acal  <= L;		else if (calcy) acal <= H;
    if (arst) fmdm <= H; else fmdm <= L;					// active H - never masked
    if (arst) fmcs <= H; else fmcs <= L;					// active L
    if (arst) fmcke<= L; else fmcke <= H;					// active L
    if (fcmd0) crw <= arw;
    if (fcmd0) ccal <= acal;
    if (fcmd0) cdual <= dual;
    if (fcmd0) caddr <= addr;
    if (fcmd2) cbank <= bank;	// only used with cas - one cycle behind c
  end

  // command state machine
  wire ras = fcmd0;			// RAS
  reg  cas,casrw,rd,wr,rfr,calok,cas_,casrw_;
  wire bchop = CHOP? H:H; 		// if legal L:H
  wire autopre = cdual? fcmd4:fcmd2;
  wire [NA-1:0] rasad = addr[29:16];	// RAS address
  reg  [NA-1:0] casad,casad_;
  wire fcmd12 = fcmd1 || fcmd2;
  wire fcmd13 = fcmd1 || fcmd3;
  wire fcmd34 = fcmd3 || fcmd4;
  wire fcmd24 = fcmd2 || fcmd4;
  wire chop = CHOP && !( (t1 && crw) || (t2 && !crw) );
  always @(posedge aclk) begin
    calok <= calib_done;
    acfg  <= calok &&  (acl &&  cfg_ && !t4);		// configuration command
    fcmd0 <= calok && ((acl && !cfg_) || calcy);	// normal or calibration command
    fcmd1 <= fcmd0;
    fcmd2 <= fcmd1;
    fcmd3 <= fcmd2;
    fcmd4 <= fcmd3; 
    fcmdx <= fcmd1|fcmd2|fcmd3|fcmd4;
    cas_  <= CHOP?  (fcmd1  || (cdual&&fcmd3))
                 :  (fcmd12 || (cdual&&fcmd34));
    casrw_ <= crw && (fcmd12 || (cdual&&fcmd34));
    casad_ <= CHOP? {L,bchop,L,autopre, caddr[15:10],(caddr[6]^(fcmd34)),1'b0  ,2'b00}
                 : {L,bchop,L,autopre, caddr[15:11],(caddr[7]^(fcmd34)),caddr[6]^fcmd24,3'b000};
    cas   <= cas_;
    casrw <= casrw_;
    casad <= casad_;
    rd    <=  crw && (fcmd12|fcmd34) && !ccal;	// always read full slot but not cal cycle read
    wr    <= !crw && (fcmd12|fcmd34) && !ccal;
    rfr   <= refresh && calok && !rmems;
    if (arst) fmodt <= 0;   else if (fcmd1) fmodt <= !crw;
    if (!calok) calrw <= L; else if (calcy) calrw <= H;
  end

  delaypipe #(RACKDLY-1) rackdp (aclk,rd,rack);
  delaypipe #(WACKDLY-2) wackdp (aclk,wr,wack);	
  delaypipe #(RACKDLY+9) caludp (aclk,calup,calupd);	// update read calibration in middle of next write cycle since no back-to-back read cycles

  wire [63:0] wrDataMask = {64{L}};
  wire [511:0] wrData;
  wire [511:0] rdData;
  for (i=0; i<NDQ; i=i+1) begin:db	// data bit
   for (j=0; j<8; j=j+1) begin:ts	// time slot
    assign wrData[i*8+j] = wdata[j*NDQ+i];
    assign rdata[j*NDQ+i] = rdData[i*8+j];
   end
  end
  wire [511:0] dbg_bus;

  wire [7:0] mcAct = { {2{H}},    {2{H}},           {2{H}},     {2{ras?L:H}} };
  wire [7:0] mcRas = { {2{!rfr}}, {2{H}},           {2{!acfg}}, {2{ras?L:H}} };
  wire [7:0] mcCas = { {2{!rfr}}, {2{cas?L:H}},     {2{!acfg}}, {2{ras?addr[31]:H}} };
  wire [7:0] mcWe  = { {2{H}},    {2{cas?casrw:H}}, {2{!acfg}}, {2{ras?addr[30]:H}} };
  wire [NA*8-1:0] mcAdr;
  for (i=0; i<NA; i=i+1) begin:bma
    assign mcAdr[7+i*8:i*8] = { {4{casad[i]}},{4{rasad[i]}} };
  end
  wire [4:0] dBufAdr=0;
  wire [4:0] rdDataAddr;
  wire [0:0] rdDataEn;
  wire [0:0] rdDataEnd;
  wire [0:0] per_rd_done;
  wire [0:0] rmw_rd_done;
  wire [4:0] wrDataAddr;
  wire [0:0] wrDataEn;

  wire [7:0] mc_ACT_n = mcAct;
  wire [135:0] mc_ADR = {mcRas,mcCas,mcWe,mcAdr};
  wire [15:0] mc_BA = { {4{cbank[1]}},{4{bank[1]}}, {4{cbank[0]}},{4{bank[0]}} };
  wire [7:0] mc_BG  = { {4{cbank[2]}},{4{bank[2]}} };
  wire [7:0] mc_CKE  = {8{H}};		// always high
  wire [7:0] mc_CS_n = {8{fmcs}};
  wire [7:0] mc_ODT  = {8{fmodt}};

  wire [1:0] mcCasSlot = {cas,L};	// CAS slot always slot 2
  wire [0:0] mcCasSlot2 = cas;
  wire [0:0] mcRdCAS = (cas&&casrw);
  wire [0:0] mcWrCAS = (cas&&!casrw);
  wire [0:0] winInjTxn = 0;
  wire [0:0] winRmw = 0;
  wire gt_data_ready = calupd&&!t3;
  wire [4:0] winBuf = 0;
  wire [1:0] winRank = 0;
  wire [5:0] tCWL;

`ifdef PIC8B

  assign calib_done = H;
  assign usrclk = ioclk;

`elsif PIC8P

  assign calib_done = H;
  assign usrclk = ioclk;

`else

  mig_0 mig (
    srst,sysclkp,sysclkn,		// DDR clocks,resets
    usrclk,usrrst,dbgclk,		// user clocks,resets - memory debug goes after here
    mact,{mras,mcas,mwe,ma},mba[1:0],mba[2],mcke,modt,mcs,mckp,mckn,mrst,mdm,mdq,mdqsn,mdqsp,	// external memory pins
    calib_done, dBufAdr,wrData,wrDataMask,rdData,rdDataAddr,rdDataEn,rdDataEnd,per_rd_done,rmw_rd_done,wrDataAddr,wrDataEn,
    mc_ACT_n,mc_ADR,mc_BA,mc_BG,mc_CKE,mc_CS_n,mc_ODT,mcCasSlot,mcCasSlot2,mcRdCAS,mcWrCAS,
    winInjTxn,winRmw,gt_data_ready,winBuf,winRank,tCWL,dbg_bus);

`endif

  reg ff; always @(posedge aclk) ff <= !ff;
  assign test = {wrDataEn,rdDataEn,wack,rack,calok,usrrst,rmems,ff};
//  assign test = {calok,usrrst,rmems,ff,wrDataEn,rdDataEn,ras,renl};
//  assign test = {ff,refresh, wrDataEn,rdDataEn,wenl,renl};
//  assign test = {calok,calcy,ff,refresh, wrDataEn,rdDataEn,wenl,renl};
//  assign test = stest;
  assign trs = {arst,calok};

endmodule
