/**********************************************
 ICE Proprietary Software - do NOT disseminate
 **********************************************/
/*
  DDR Standard DRAM Controller (256Mby x 64b)

  MODE: 1=DDR/PIC5 2=DDR2/PIC6 3=DDR2/ZPPM 4=DDR2/VxM

  Pipelined command R/W queues each 16 bursts (64by) deep

  Jeff Schoen
  Innovative Computer Engineering, Inc.
  5/15/2005

  This module uses 4 DRAMs to interface the 1333Mby/s input/output streams 
  to the 2666Mby/sec DDR interface.  All transfers must be naturally aligned.
  Burst transfers are 4, 16, 32, or 64 bytes in length.

  Use 64by banks and wait DDR2?11:10 cycles between same bank activations.

  Use delayed CAS on DDR2 to allow interleaved RAS for next bank

  Need DDR2?512:256 byte cache line controller to eliminate bank waits

  the read clock window is typically 140 deg wide and V2=90,V4=0 degrees out of phase with control clock
  we set the starting rclk phase at front of possible window to insure minimum timing between rack and renl

  Memory control word:
  [31] nCS on 2nd cycle (config register)
  [30] FN on 1st cycle
  [29:28]=xfer 0-4by 1-16by 2-32by 3-64by
  [27:0]=ADDR [7:6]=BANK [2:0]=0

  [27:0]=ADDR [8:7]=BANK [2:0]=0

  PPC addr to SDRAM addr bits:
    64by banks:  crbl CBA9 8765 4321 0|987 6543 bb21 0xxx

  Losses:
    Refresh  20/1536
    TurnAround  5/64

  Cycle Timing:

  Busy
 !B
  A
  1 RAS      - new queue   bankwrite
  2 NOP      - new iqsel   bankwait   quad
  3 NOP  !B                            cquad
  4 CAS  A
  5 NOP  1 RAS
  6 CAS  2 NOP                        quad
         3 NOP                         cquad
         4 CAS
         5 NOP  1 RAS
         6 CAS  2 NOP
                3 NOP
                4 CAS
                5 NOP  1 RAS
                6 CAS  2 NOP
                       3 NOP
                       4 CAS
  1 RAS                5 NOP
  2 NOP                6 CAS


          M2    M4    M5
  acl
1     fcmd1
      fcmd2
      fcmd3
  acl fcmd4 rd wr
      fcmd5 rd wr1
  acl fcmd6 rd wr2 wack autopre
      fcmd7 rd wr3      bankdone
8     fcmd8 rd wr4      1
 tacl fcmd9 rd wr5      2
            rd wr6 rdx  3
            rd wr7 rdy|rack	cause rdy is negedge of rclk
                        5
                        6
                        7
                        8
                        bankok
                        !bankwait
18                      acl
                        fcmd1

  Memory Cache Line Mapping:

    IoBus  HGhg FEfe DCdc BAba  (32by) (4cyc)

    ReRam  hgfe dcba
    FeRam  HGFE DCBA
    ChipsR d c b a
    ChipsF D C B A
    ChipsR h g f e
    ChipsF H G F E

*/
`include "mdefs.h"
`include "ramdefs.h"

`ifdef SDRAM1
`define ISTD #(.IOSTANDARD("LVCMOS25"))
`define OSTD #(.IOSTANDARD("LVCMOS25"),.SLEW("FAST"),.DRIVE(8))
`else 
`define ISTD #(.IOSTANDARD("LVCMOS18"))
`define OSTD #(.IOSTANDARD("LVCMOS18"),.SLEW("FAST"),.DRIVE(12))
`endif

//`define READABLE 1

module sddrXram (
  clks,rst, ago,apd,acmd,afull,qrdym,qselm,
  ioclk, irdy,iena,ibus, ordy,oena,obus,
  mckp,mckn,mcke,mwe,mcas,mras,mdm,mcs,mba,ma,mdqs,mdq,modt,
  mclk,trs, test
  );

  parameter MBW=128;
  parameter IBW=64;
  parameter MODE=1;

  parameter NCS=(MODE>=3)?1:2;
  parameter NDM=(MODE==3)?8:2;
  parameter NA=(MODE==3)?13:13;
  parameter NBA=(MODE>=3)?3:2;
  parameter NDQS=(MODE==3)?16:8;
  parameter NDQ=64;

  parameter MBH=MBW/2;
  parameter IBH=IBW/2;

  parameter MBB=(MBW==64)?10:9;
  parameter IBB=(IBW==32)?11:10;

  parameter RACKDLY=(MODE==1)?8:11;
  parameter WACKDLY=(MODE==1)?5:6;

  input [9:0] clks;
  input rst,ago,apd;
  input [35:0] acmd;
  output afull;
  output [23:0] qrdym;
  input  [15:0] qselm;

  input  ioclk, iena,oena;
  input  irdy,ordy;
  input  [IBW-1:0] obus;
  output [IBW-1:0] ibus;

  inout mckp;
  output mckn,mcke,mwe,mcas,mras,modt;
  output [NCS-1:0] mcs;
  output [NBA-1:0] mba;
  output [NDM-1:0] mdm;
  output [NA-1:0]  ma;
  inout [NDQS-1:0] mdqs;
  inout [NDQ-1:0]  mdq;

  input mclk;
  output [2:0] trs;
  output [7:0] test;

  wire H=1, L=0;

  wire [1:0]  L2  = {2{L}};
  wire [2:0]  L3  = {3{L}};
  wire [3:0]  L4  = {4{L}};
  wire [6:0]  L7  = {7{L}};
  wire [15:0] L16 = {16{L}};
  wire [31:0] L32 = {32{L}};

  wire M1 = (MODE==1)? H:L;  // PIC5
  wire M2 = (MODE>=2)? H:L;  // PIC6+
  wire M4 = (MODE>=4)? H:L;  // V5M+

  wire aclk = clks[0];
  wire rclk = clks[1];
  wire wclk = clks[2];
  wire sclk = clks[6];

  genvar i;

  reg rack,wack;
  wire renl,renu,wenl,wenu,ienax,oenax;
  wire [MBW-1:0] rdata,wdata;
  wire [IBB-1:0] iadrx,oadrx;
  wire [MBB-1:0] rcntl,rcntu,wcntl,wcntu;

  wire acl,refresh;
  wire [35:0] qcmd;
  scheduler #(2,IBB,MBB,RACKDLY,WACKDLY) cnt (
        ioclk,rst, ago,acmd,afull, qrdym,qselm,
        ioclk,irdy,iena,ienax,iadrx, ioclk,ordy,oena,oenax,oadrx,
        aclk,acl,qcmd,refresh, rclk,rack,renl,rcntl,renu,rcntu, wclk,wack,wenl,wcntl,wenu,wcntu,
        sclk,16'h0000,test);

  wire [MBH-1:0] rduml,rdumu;
  wire [IBH-1:0] wduml,wdumu;

  // DMAR cache data array 
  dpram #(4,IBH,MBH,0,0    ) rl (ioclk,H, iadrx,L,{IBH{L}},ibus[IBH-1:  0],  rclk,H,rcntl,renl, rdata[MBH-1:  0],rduml);
  dpram #(4,IBH,MBH,0,`CINV) ru (ioclk,H, iadrx,L,{IBH{L}},ibus[IBW-1:IBH],  rclk,H,rcntu,renu, rdata[MBW-1:MBH],rdumu);

  // DMAW cache data array 
  dpram #(4,IBH,MBH,0,0    ) wl (ioclk,H, oadrx,oenax,obus[IBH-1:  0],wduml, wclk,H,wcntl,L, {MBH{L}},wdata[MBH-1:  0]);
  dpram #(4,IBH,MBH,0,`CINV) wu (ioclk,H, oadrx,oenax,obus[IBW-1:IBH],wdumu, wclk,H,wcntu,L, {MBH{L}},wdata[MBW-1:MBH]);

  // register the output data for routing purposes
  reg [MBW-1:0] vdata;
  always @(posedge wclk) vdata[MBH-1:00]  <= wdata[MBH-1:00];
  always @(negedge wclk) vdata[MBW-1:MBH] <= wdata[MBW-1:MBH];
  wire [MBW-1:0] xdata = M2? vdata : wdata;

  // register the input data for routing purposes
`ifdef OLDWAY
  reg [MBW-1:0] sdata;
  wire [MBW-1:0] ydata;
  always @(posedge rclk) sdata[MBH-1:00]  <= ydata[MBH-1:00];
  always @(negedge rclk) sdata[MBW-1:MBH] <= ydata[MBW-1:MBH];
  assign rdata = sdata;
`else
  reg [MBW-1:0] sdata;
  wire [MBW-1:0] ydata;
  always @(posedge rclk) sdata[MBH-1:00]  <= ydata[MBH-1:00];
  always @(negedge rclk) sdata[MBW-1:MBH] <= ydata[MBW-1:MBH];
  assign rdata = sdata;
`endif

  reg wr,wr1,wr2,wr3,wr4,wen;
  reg rd,rd1,rd2,rd3,rd4,rd5,rd6,rd7;
  reg oen,oeni,oenj,woeni,woenj;

  reg [13:0] fma;
  reg [2:0] fmba;
  reg fmwe,fmcke,fmras,fmcas;

  reg fcmd1,fcmd2,fcmd3,fcmd4,fcmd5,fcmd6,fcmd7,fcmd8,fcmd9;
  reg arw,crw,acfg,acfgnp,dual,cdual,cquad;
  reg [2:0] bank,cbank;
  reg [27:2] addr,caddr;

  wire quad = 0;

  // burst DMA state machine
  always @(posedge aclk) begin
    if (acl) arw   <= qcmd[35];	// read/write
    if (acl) dual  <= qcmd[33:32]==3;
    if (acl) acfg  <= qcmd[31];	// configuration command
    if (acl) addr  <= qcmd[27:2];	// start RAM address
    if (acl) bank  <= qcmd[8:6];
    if (acl) acfgnp <= qcmd[31] && !(qcmd[35] || qcmd[13]);	// configuration command - no precharge
    if (fcmd2) crw  <= arw;
    if (fcmd2) cbank <= bank;
    if (fcmd2) caddr <= addr;
    if (fcmd2) cdual <= dual;
    if (rst)   cquad <= 0; else if (fcmd2) cquad <= quad;
  end

  // command state machine
  wire access = (fcmd4 || (cdual&&fcmd6)) && !acfg;
  wire autopre = cquad? L : cdual? fcmd6 : fcmd4;
  wire openrow  = fcmd1 && !cquad;
  wire precharge = arw || addr[13];
  always @(posedge aclk) begin
    fcmd1 <= acl && !fcmd1;
    fcmd2 <= fcmd1;
    fcmd3 <= fcmd2;
    fcmd4 <= fcmd3; 
    fcmd5 <= fcmd4; 
    fcmd6 <= fcmd5; 
    fcmd7 <= fcmd6; 
    fcmd8 <= fcmd7; 
    fcmd9 <= fcmd8; 
`ifdef READABLE
    if (openrow) begin			// activate row
      fmras <= L;
      fmcas <= (acfg)? precharge : H;
      fmwe  <= (acfg)? L : H;
      fmba  <= bank;
      fma   <= {L,addr[27:15]};
    end else if (access) begin		// column read|write
      fmras <= H;
      fmcas <= L;
      fmwe  <= crw;
      fmba  <= cbank;
      fma   <= {L,L,L,autopre, caddr[14:8],(caddr[5]^fcmd6),2'b00};	// on 32by cache line boundaries only
    end else if (refresh) begin		// autorefresh
      fmras <= L;
      fmcas <= L;
      fmwe  <= H;
    end else begin    			// nop
      fmras <= H;
      fmcas <= H;
      fmwe  <= H;
    end
    fmcke <= apd;
`endif
    rd  <= (fcmd3||fcmd5) && crw;
    rd1 <= rd; rd2 <= rd1; rd3 <= rd2; rd4 <= rd3; rd5 <= rd4; rd6 <= rd5; rd7 <= rd6;    
    wr  <= (fcmd3||fcmd5) && !crw;
    wr1 <= wr; wr2 <= wr1; wr3 <= wr2; wr4 <= wr3;
    wen <= !acfg && (M2? (wr3||wr4) : (wr1||wr2));
  end

  wire cclk = wclk;

`ifndef READABLE
  // lets help the state machine encoder and use wclk to help the logic to IO route
  reg openrow_,access_,autopre_;
  always @(posedge aclk) begin	
    openrow_ <= acl && !fcmd1 && !cquad;
    access_  <= (fcmd3 || (cdual&&fcmd5)) && !acfg;
    autopre_ <= cquad? L : cdual? fcmd5 : fcmd3;
  end
  always @(posedge cclk) begin	
    fmcke <= apd;
    fmras <= !( openrow_ || refresh );
    fmcas <= !( (openrow_ && acfgnp) || access_ || refresh );
    fmwe  <= !( (openrow_ && acfg) || (access_ && !crw) );
    fmba  <= openrow_? bank : cbank;
    fma   <= openrow_? {L,addr[27:15]} : {L,L,L,autopre_, caddr[14:8],(caddr[5]^fcmd6),2'b00};
  end
`endif

  // output enables - one early last half, one late first half
  always @(negedge aclk) oen <= (MODE==3)? (wr1|wr2|wr3) : M2? (wr2|wr3|wr4) : (wr|wr1|wr2);

  always @(negedge aclk) oenj <= oen;
  always @(posedge aclk) oeni <= oenj;

  always @(negedge wclk) woenj <= oen;
  always @(posedge wclk) woeni <= woenj;

  // controller acks - one cycle early  (V2=DDR1 V4+=DDR2)
  reg rdx,rdy,rdz,mack,macken;
  always @ (posedge cclk) begin
    if (fcmd5) macken <= cdual;
    if (!macken) mack <= 0; else mack <= rdx;
  end
  always @ (posedge aclk) begin
    wack <= M2? (wr1||wr2) : (wr||wr1);
    rdx  <= M2? (rd4||rd5) : (rd2||rd3);
    rdy  <= rdx;
    rdz  <= rdy;
  end
`ifdef VIRTEX2
  always @ (negedge cclk) begin
`else
  always @ (negedge aclk) begin
`endif
    rack <= rdz;
  end

  // timing recovery signals
  wire [7:0] mclks;
  wire [15:0] rdqs;
`ifdef PIC5
  wire trs_re; vote3 rev (trs_re, rdqs[04],rdqs[05],rdqs[07]);
  wire trs_fe; vote3 fev (trs_fe, rdqs[12],rdqs[13],rdqs[15]);
  assign mclks = {mclk,L,mclk,mclk,L,L,L,L};
`elsif PIC6
  wire trs_re = rdqs[00] || rdqs[01];
  wire trs_fe = rdqs[08] || rdqs[09];
  assign mclks = {L,L,L,L,L,L,mclk,mclk};
`else
  wire trs_re; vote3 rev (trs_re, rdqs[00],rdqs[03],rdqs[07]);
  wire trs_fe; vote3 fev (trs_fe, rdqs[08],rdqs[11],rdqs[15]);
  assign mclks = {8{mclk}};
`endif
  reg trs_en; always @ (posedge mclk) trs_en <= mack;
  assign trs = {trs_fe,trs_re,trs_en};

  // DDR clocks (with feedback from mckp)
  ddr_o bmckp (mckp,H,H, aclk, H,L);
  ddr_o bmckn (mckn,H,H, aclk, L,H);

  // control signals (180 phase)
  sdr_oj bmcke (mcke,H, aclk, fmcke);
  sdr_oj bmras (mras,H, aclk, fmras);
  sdr_oj bmcas (mcas,H, aclk, fmcas);
  sdr_oj bmwe  (mwe, H, aclk, fmwe);

  generate

  // differential termination select (PIC6/ZPPM) (180 phase)
  if (MODE==2 || MODE==3) begin:bmodt
    sdr_oj bmodt (modt,oen, aclk, L);
  end

  // chip select (180 phase)
  for (i=0; i<NCS; i=i+1) begin:bcs
    sdr_oj inst  (mcs[i],H, aclk, L);
  end

  // data masks (180 phase)
  for (i=0; i<NDM; i=i+1) begin:bdm
    sdr_oj inst  (mdm[i],H, aclk, L);
  end

  // address signals (180 phase)
  for (i=0; i<NA; i=i+1) begin:bma
    sdr_oj inst  (ma[i],H, aclk, fma[i]);
  end

  // bank signals (180 phase)
  for (i=0; i<NBA; i=i+1) begin:bmba
    sdr_oj inst (mba[i],H, aclk, fmba[i]);
  end

  // data strobes
  for (i=0; i<8; i=i+1) begin:bmdqs
    ddr_io inst (mdqs[i],oeni,oenj, mclks[i],rdqs[i+0],rdqs[i+8], aclk, wen,L);
  end
  for (i=8; i<NDQS; i=i+1) begin:bmdqsn
    ddr_o inst (mdqs[i],oeni,oenj, aclk, ~wen,L);
  end

  // data buffers
  for (i=0; i<NDQ; i=i+1) begin:dq
    ddr_io inst (mdq[i], woeni,woenj, rclk,ydata[i+00],ydata[i+64], wclk,xdata[i+00],xdata[i+64]);  
  end

  endgenerate

endmodule


// single data rate clocked output buffer
module sdr_o (out,ena, clk,in);
  output out;
  input in,clk,ena;

  wire ff;
  FD f0 (.Q(ff),.C(clk),.D(in)) /* synthesis syn_useioff=1 */; 
  OBUFT `OSTD b0 (.I(ff), .T(~ena), .O(out) );
   //synthesis attribute IOB of f0 is "TRUE"

endmodule

// single data rate clocked output buffer
module sdr_oj (out,ena, clk,in);
  output out;
  input in,clk,ena;

  wire ff;
  FD f0 (.Q(ff),.C(~clk),.D(in)) /* synthesis syn_useioff=1 */; 
  OBUFT `OSTD b0 (.I(ff), .T(~ena), .O(out) );
   //synthesis attribute IOB of f0 is "TRUE"

endmodule

// double data rate clocked output buffer
module ddr_o (out,ena0,ena1, clk,in0,in1);
  output out;
  input ena0,ena1,clk,in0,in1;

  wire H=1, L=0;
  wire ff,tt;
  FDDRRSE f0 (.Q(ff),.C0(clk),.C1(~clk),.CE(H),.D0(in0),.D1(in1),.R(L),.S(L)) /* synthesis syn_useioff=1 */;
  FDDRRSE t0 (.Q(tt),.C0(clk),.C1(~clk),.CE(H),.D0(~ena0),.D1(~ena1),.R(L),.S(L)) /* synthesis syn_useioff=1 */;
  OBUFT `OSTD b0 (.I(ff), .T(tt), .O(out) );
  //synthesis attribute IOB of f0 is "TRUE"
  //synthesis attribute IOB of t0 is "TRUE"

endmodule

// double data rate clocked input/output buffer
module ddr_io (io,ena0,ena1, rclk,rdata0,rdata1, wclk,wdata0,wdata1);
  inout io;
  input ena0,ena1,rclk,wclk;
  output rdata0,rdata1;
  input wdata0,wdata1;

  wire H=1, L=0;
  wire rff,wff,tff;

  IOBUF `OSTD odq (.I(wff),.T(tff),.IO(io),.O(rff));
`ifdef VIRTEX2
  FDDRRSE btff (.Q(tff),.C0(wclk),.C1(~wclk),.CE(H),.D0(~ena0),.D1(~ena1),.R(L),.S(L)) /* synthesis syn_useioff=1 */;
  //synthesis attribute IOB of bwff is "TRUE"
  FDDRRSE bwff (.Q(wff),.C0(wclk),.C1(~wclk),.CE(H),.D0(wdata0),.D1(wdata1),.R(L),.S(L)) /* synthesis syn_useioff=1 */;
  //synthesis attribute IOB of btff is "TRUE"
  FD brff0 (.D(rff),.C(rclk),.Q(rdata0)) /* synthesis syn_useioff=1 */; 
  FD brff1 (.D(rff),.C(~rclk),.Q(rdata1)) /* synthesis syn_useioff=1 */; 
  //synthesis attribute IOB of brff0 is "TRUE"
  //synthesis attribute IOB of brff1 is "TRUE"
`else
  ODDR btff (.Q(tff),.C(wclk),.CE(H),.D1(~ena0),.D2(~ena1));
  ODDR bwff (.Q(wff),.C(wclk),.CE(H),.D1(wdata0),.D2(wdata1));
  IDDR brff (.D(rff),.C(rclk),.CE(H),.Q1(rdata0),.Q2(rdata1));
`endif

endmodule

