/**********************************************
 ICE Proprietary Software - do NOT disseminate
 **********************************************/
/*
  DDR-3 Standard DRAM Controller (256Mby x 64b)

  MODE: 1=DDR/PIC5 2=DDR2/PIC6 3=DDR2/ZPPM 4=DDR2/VxM

  Pipelined command R/W queues each 16 bursts (64by) deep

  Jeff Schoen
  Innovative Computer Engineering, Inc.
  5/15/2005

  This module uses 4 DRAMs to interface the 1333Mby/s input/output streams 
  to the 2666Mby/sec DDR interface.  All transfers must be naturally aligned.
  Burst transfers are 4, 16, 32, or 64 bytes in length.

  Use 64by banks and wait DDR2?11:10 cycles between same bank activations.

  Use delayed CAS on DDR2 to allow interleaved RAS for next bank

  Need DDR2?512:256 byte cache line controller to eliminate bank waits

  The read clock window is typically 140 deg wide and V2=90,V4=0 degrees out of phase with control clock
  we set the starting rclk phase at front of possible window to insure minimum timing between rack and renl

  Memory control word:
  [31] nCS on 2nd cycle (config register)
  [30] FN on 1st cycle
  [29:28]=xfer 0-4by 1-16by 2-32by 3-64by
  [27:0]=ADDR [7:6]=BANK [2:0]=0

  [27:0]=ADDR [8:7]=BANK [2:0]=0

  PPC addr to SDRAM addr bits:
    64by banks:  crbl CBA9 8765 4321 0|987 6543 bb21 0xxx

  Losses:
    Refresh  20/1536
    TurnAround  5/64

  Cycle Timing:

  Busy
 !B
  A
  1 RAS      - new queue   bankwrite
  2 NOP      - new iqsel   bankwait   quad
  3 NOP  !B                            cquad
  4 CAS  A
  5 NOP  1 RAS
  6 CAS  2 NOP                        quad
         3 NOP                         cquad
         4 CAS
         5 NOP  1 RAS
         6 CAS  2 NOP
                3 NOP
                4 CAS
                5 NOP  1 RAS
                6 CAS  2 NOP
                       3 NOP
                       4 CAS
  1 RAS                5 NOP
  2 NOP                6 CAS

 !B
  A
  1 RAS
  2 NOP
  3 CAS
  4 NOP
  5 NOP
  6 NOP
  7 CAS !B
  8 NOP  A
         1 RAS
         2 NOP
         3 CAS
         4 NOP
         5 NOP
         6 NOP
         7 CAS
         8 NOP
    ... 
 

  acl fcmd0
1     fcmd1 arw            openrow
      fcmd2               RAS
      fcmd3 crw
  acl fcmd4 rd wr       access
      fcmd5 rd wr1        CAS
      fcmd6 rd wr2      access
      fcmd7 rd wr3        CAS   wack
8     fcmd8 rd wr4       
 tacl fcmd9 rd wr5        WEN    
            rd wr6 
            rd wr7 
                         
                         
                         
                          rack     
                          renl

                        bankok
                        !bankwait
18                      acl
                        fcmd1

  Memory Cache Line Mapping:

    IoBus  HGhg FEfe DCdc BAba  (32by) (4cyc)

    ReRam  hgfe dcba
    FeRam  HGFE DCBA
    ChipsR d c b a
    ChipsF D C B A
    ChipsR h g f e
    ChipsF H G F E

*/
`include "mdefs.h"
`include "ramdefs.h"

`ifdef NIC7
`define MOSTD #(.IOSTANDARD("SSTL15"),.SLEW("FAST"))
`define MIOSTD #(.IOSTANDARD("LVDCI_15"),.SLEW("FAST"))
`define MDIOSTD #(.IOSTANDARD("DIFF_SSTL15"),.SLEW("FAST"))
`define MDOSTD #(.IOSTANDARD("DIFF_SSTL15"),.SLEW("FAST"))
`else
`define MOSTD #(.IOSTANDARD("SSTL15"),.SLEW("FAST"))
`define MIOSTD #(.IOSTANDARD("SSTL15_T_DCI"),.SLEW("FAST"))
`define MDIOSTD #(.IOSTANDARD("DIFF_SSTL15_T_DCI"),.SLEW("FAST"))
`define MDOSTD #(.IOSTANDARD("DIFF_SSTL15"),.SLEW("FAST"))
`endif

//`define READABLE 1

module sddrXram (
  clks,rst, ago,apd,acmd,afull,qrdym,qselm,
  ioclk, irdy,iena,ibus, ordy,oena,obus,
  m1ckp,m1ckn,m1cke,m1we,m1cas,m1ras,m1a,m1ba,m1dq,m1dqsp,m1dqsn,
  m2ckp,m2ckn,m2cke,m2we,m2cas,m2ras,m2a,m2ba,m2dq,m2dqsp,m2dqsn,
  mxcs,mxrst,mxodt, mclk,trs, test
  );

  parameter MBW=128;
  parameter IBW=64;
  parameter MODE=7;

  localparam NA=15;
  localparam NBA=3;
  localparam NDQS=2;		// per DDR3 ram chip
  localparam NDQ=8*NDQS;	// per DDR3 ram chip

  localparam MBH=MBW/2;
  localparam IBH=IBW/2;

  localparam MBB=(MBW==64)?10:9;
  localparam IBB=(IBW==32)?11:10;

  localparam RACKDLY=14;	// CL5=11 CL10=14 CL11+.5phase=15
  localparam WACKDLY=6;		// CWL7=6 

  input [35:0] clks;
  input rst,ago,apd;
  input [35:0] acmd;
  output afull;
  output [23:0] qrdym;
  input  [15:0] qselm;

  input  ioclk, iena,oena;
  input  irdy,ordy;
  input  [IBW-1:0] obus;
  output [IBW-1:0] ibus;

  output m1ckp,m1ckn,m1cke,m1we,m1cas,m1ras;
  output [NA-1:0] m1a; 
  output [NBA-1:0] m1ba;
  inout  [NDQ-1:0] m1dq;
  inout  [NDQS-1:0] m1dqsp,m1dqsn;

  output m2ckp,m2ckn,m2cke,m2we,m2cas,m2ras;
  output [NA-1:0] m2a;
  output [NBA-1:0] m2ba;
  inout  [NDQ-1:0] m2dq;
  inout  [NDQS-1:0] m2dqsp,m2dqsn;

  output mxcs,mxrst,mxodt;

  input mclk;
  output [2:0] trs;
  output [7:0] test;

  wire H=1, L=0;

  wire aclk = clks[0];
  wire rclk = clks[1];
  wire wclk = clks[2];
  wire aclk2 = clks[4];
  wire rclk2 = clks[5];
  wire wclk2 = clks[6];
  wire sclk  = clks[8];
  wire [3:0] dadj = clks[11:8];
  wire [3:0] cadj = clks[15:12];
  wire [15:0] rcfg = clks[31:16];
  reg [3:0] ttt;always @(posedge aclk) ttt <= clks[35:32];
  reg arst;    always @(posedge aclk) arst <= rst;
  wire arstd; delaypipe #(12,1) clkdp (aclk,arst,arstd);
  wire rstn;  delaypipe #(64) startup (aclk,H,rstn);
  reg clkrst;  always @(posedge aclk) clkrst  <= arst && !arstd;
  reg aclkrst; always @(posedge aclk) aclkrst <= clkrst;
  reg rclkrst; always @(posedge rclk) rclkrst <= clkrst;
  reg wclkrst; always @(posedge wclk) wclkrst <= clkrst;
  wire [2:0] aclks = {aclkrst,aclk2,aclk};
  wire [2:0] rclks = {rclkrst,rclk2,rclk};
  wire [2:0] wclks = {wclkrst,wclk2,wclk};

  wire txdt  = ttt[0];
  wire twrt  = ttt[1];
  wire tpad  = ttt[2];
  wire tlbt  = ttt[3];

  genvar i;

  wire rack,wack,wen,weni,wenj;
  wire renl,renu,wenl,wenu,ienax,oenax;
  wire [MBW-1:0] rdata,wdata;
  wire [IBB-1:0] iadrx,oadrx;
  wire [MBB-1:0] rcntl,rcntu,wcntl,wcntu;

  wire acl,refresh;
  wire [35:0] qcmd;
  wire [7:0] stest;
  scheduler #(2,IBB,MBB,RACKDLY,WACKDLY) cnt (
        ioclk,arst, ago,acmd,afull, qrdym,qselm,
        ioclk,irdy,iena,ienax,iadrx,
	ioclk,ordy,oena,oenax,oadrx,
        aclk,acl,qcmd,refresh, 
	rclk,rack,renl,rcntl,renu,rcntu, 
	wclk,wack,wenl,wcntl,wenu,wcntu,
        sclk,rcfg,stest);

  // DMAR cache data array 
  sdpram #(8,MBW,IBW,0,0) rl (rclk,H, rcntl,renl,rdata,  ioclk,H, iadrx,ibus);

  // DMAW cache data array 
  sdpram #(8,IBW,MBW,0,`OREG) wl (ioclk,H, oadrx,oenax,obus,  wclk,H, wcntl,wdata);

  // test vectors
  reg wrst; always @(posedge wclk) wrst <= acl;
  reg  [2:0] wtst; always @(posedge wclk) if (wrst) wtst <= 0; else wtst <= wtst+1;
  wire [3:0] wtst0 = {wtst,L}, wtst1 = {wtst,H};
  wire [127:0] wpat = { {8{~wtst1}},{8{wtst1}}, {8{~wtst0}},{8{wtst0}} };
  reg rrr; always @(posedge rclk) rrr <= twrt;
  reg  [2:0] rtst; always @(posedge rclk) rtst <= wtst;
  wire [3:0] rtst0 = {rtst,L}, rtst1 = {rtst,H};
  wire [127:0] rpat = { {8{~rtst1}},{8{rtst1}}, {8{~rtst0}},{8{rtst0}} };

  // register the output data for routing purposes
  reg [MBW-1:0] vdata;
  always @(posedge wclk) vdata <= tlbt? wpat : wdata;
  wire [MBW-1:0] xdata = vdata;

  // register the input data for routing purposes
  reg [MBW-1:0] sdata;
  wire [MBW-1:0] ydata;
  always @(posedge rclk) sdata <= rrr?  rpat : ydata;
  assign rdata = sdata;

  reg rd,rd1,rd2,rd3,rd4;
  reg wr,wr1,wr2,wr3,wr4;

  reg [NA-1:0] fma;
  reg [NBA-1:0] fmba;
  reg fmwe,fmrst,fmcs,fmodt,fmcke,fmras,fmcas;

  reg fcmd1,fcmd2,fcmd3,fcmd4,fcmd5,fcmd6,fcmd7,fcmdx;
  reg arw,crw,acfg,dual,cdual,cquad,zqcl;
  reg [NBA-1:0] bank,cbank;
  reg [27:2] addr,caddr;

  wire quad = 0;

  // burst DMA state machine
  always @(posedge aclk) begin
    if (acl) arw   <= qcmd[35];	// read/write
    if (acl) dual  <= qcmd[33:32]==3;
    if (acl) acfg  <= qcmd[31];	// configuration command
    if (acl) addr  <= qcmd[27:2];	// start RAM address
    if (acl) bank  <= qcmd[8:6];
    if (acl) zqcl  <= qcmd[31] && qcmd[8];
    if (fcmd2) crw  <= arw;
    if (fcmd2) cbank <= bank;
    if (fcmd2) caddr <= addr;
    if (fcmd2) cdual <= dual;
    if (arst)  cquad <= 0; else if (fcmd2) cquad <= quad;
    if (!rstn) fmrst <= L; else if (arst^arstd) fmrst <= !arst;
    if (arst) fmcke <= L; else fmcke <= (acl || fmcke);		// active H
    if (arst) fmodt <= H; else fmodt <= H;			// active H
  end


  // command state machine
  wire access = (fcmd4 || (cdual&&fcmd6)) && !acfg;
  wire autopre = cquad? L : cdual? fcmd6 : fcmd4;
  wire openrow  = fcmd1 && !cquad;
  wire precharge = addr[13];
  wire fcmd0 = acl && !fcmd1;
  reg openrow_,access_,access__,autopre_;
  always @(posedge aclk) begin
    fcmd1 <= fcmd0;
    fcmd2 <= fcmd1;
    fcmd3 <= fcmd2;
    fcmd4 <= fcmd3; 
    fcmd5 <= fcmd4; 
    fcmd6 <= fcmd5; 
    fcmd7 <= fcmd6; 
    fcmdx <= fcmd0|fcmd1|fcmd2|fcmd3|fcmd4|fcmd5|fcmd6|fcmd7;
`ifdef READABLE
    if (openrow) begin			// activate row
      fmras <= acfg? zqcl : L;
      fmcas <= acfg? precharge|zqcl : H;
      fmwe  <= acfg? L : H;
      fmba  <= bank;
      fma   <= {L,L,addr[27:15]};
    end else if (access) begin		// column read|write
      fmras <= H;
      fmcas <= L;
      fmwe  <= crw;
      fmba  <= cbank;
      fma   <= {L,L,L,L,autopre, caddr[14:9],(caddr[5]^fcmd6),3'b000};	// on 32by cache line boundaries only
    end else if (refresh) begin		// autorefresh
      fmras <= L;
      fmcas <= L;
      fmwe  <= H;
    end else begin    			// nop
      fmras <= H;
      fmcas <= H;
      fmwe  <= H;
    end
`else
  // help the state machine encoder and use wclk to help the logic to IO route
    openrow_ <= fcmd0 && !cquad;
    access__ <= (fcmd1&&tpad&&arw) && !acfg;
    access_  <= (fcmd3 || (cdual&&fcmd5)) && !acfg && !tlbt;
    autopre_ <= cquad? L : cdual? fcmd5 : fcmd3;
    fmras    <= openrow_? (acfg? zqcl:L) : access_? H : refresh? L : H;
    fmcas    <= openrow_? (acfg? precharge|zqcl:H) : access_? L : refresh? L : H; 
    fmwe     <= openrow_? (acfg? L:H) : access_? crw : refresh? H : H;
    fmba     <= openrow_? bank : cbank;
    fma      <= openrow_? {L,L,addr[27:15]} : {L,L,L,L,autopre_, caddr[14:9],(caddr[5]^fcmd6),3'b000};
    fmcs     <= !(openrow_ || access_ || access__ || refresh );
`endif
    rd  <= (fcmd3||fcmd5) && crw;			// always read 64 by
    rd1 <= rd; rd2 <= rd1; rd3 <= rd2; rd4 <= rd3;	// rd1 aligns with fmcas
    wr  <= (fcmd3||(cdual&fcmd5)) && !crw;
    wr1 <= wr; wr2 <= wr1; wr3 <= wr2; wr4 <= wr3; 	// wd1 aligns with fmcas
  end

//  delaypipe #(7,1) rackdp (aclk,rd1|rd2,rack); // CL5
  delaypipe #(10,1) rackdp (aclk,rd1|rd2,rack); // CL10
//  delaypipe #(11,1) rackdp (aclk,rd1|rd2,rack); // CL11 + .5 phase

  delaypipe #(1,1) wackdp  (aclk,wr1|wr2,wack);	// CWL7
  delaypipe #(4,1) wendp   (aclk,(!acfg)&(   wr1|wr2),wen);
  delaypipe #(4,1) wenidp  (aclk,(!acfg)&(   wr1|wr2),weni);
  delaypipe #(4,1) wenjdp  (aclk,(!acfg)&(wr|wr1|wr2),wenj);

  // output enables - one early last half, one late first half
  wire oenk; delaypipe #(3,1) oenkdp (aclk,   wr1|wr2|wr3|wr4,oenk); 
  reg woenk; always @(posedge wclk) woenk <= oenk || tlbt;

  // timing recovery signals
  reg mack,macken;
  always @ (posedge aclk) begin
    if (fcmd5) macken <= cdual;
    if (!macken) mack <= 0; else mack <= rack;
  end
  wire [15:0] rdqs;
  wire [1:0] rd1clk,rd2clk,m1rclk,m2rclk;
//  wire trs_re; vote3 rev (trs_re, rdqs[00],rdqs[03],rdqs[07]);
//  wire trs_fe; vote3 fev (trs_fe, rdqs[08],rdqs[11],rdqs[15]);
//  reg trs_en; always @ (posedge mclk) trs_en <= mack;
//  assign trs = {trs_fe,trs_re,trs_en};

  // chip reset
  wire mrst_ = fmrst && rstn;
  obuftN bmxrst (mxrst,L,mrst_);

  // DDR clocks (180 phase)
  wire mclk1,mclk2;
`ifdef PIC7
  qdr_od bm1ck (m1ckp,m1ckn, aclks, H,L,H,L, mclk1);
  qdr_od bm2ck (m2ckn,m2ckp, aclks, L,H,L,H, mclk2);	// pins reversed on PIC7
`else
  qdr_od bm1ck (m1ckp,m1ckn, aclks, H,L,H,L, mclk1);
  qdr_od bm2ck (m2ckp,m2ckn, aclks, H,L,H,L, mclk2);	
`endif

  // control signals
  qdr_o bm1cke (m1cke, aclks, fmcke,fmcke,fmcke,fmcke);
  qdr_o bm2cke (m2cke, aclks, fmcke,fmcke,fmcke,fmcke);

  qdr_o bm1ras (m1ras, aclks, fmras,fmras,fmras,fmras);
  qdr_o bm2ras (m2ras, aclks, fmras,fmras,fmras,fmras);
  qdr_o bm1cas (m1cas, aclks, fmcas,fmcas,fmcas,fmcas);
  qdr_o bm2cas (m2cas, aclks, fmcas,fmcas,fmcas,fmcas);
  qdr_o bm1we  (m1we,  aclks, fmwe,fmwe,fmwe,fmwe);
  qdr_o bm2we  (m2we,  aclks, fmwe,fmwe,fmwe,fmwe);

  // chip select
  qdr_o bmxcs (mxcs, aclks, H,fmcs,fmcs,H);

  // on die termination select 
  qdr_o bmxodt (mxodt, aclks, fmodt,fmodt,fmodt,fmodt);

  generate

  // address signals
  for (i=0; i<NA; i=i+1) begin:bma
    qdr_o inst1  (m1a[i], aclks, fma[i],fma[i],fma[i],fma[i]);
    qdr_o inst2  (m2a[i], aclks, fma[i],fma[i],fma[i],fma[i]);
  end

  // bank signals
  for (i=0; i<NBA; i=i+1) begin:bmba
    qdr_o inst1 (m1ba[i], aclks, fmba[i],fmba[i],fmba[i],fmba[i]);
    qdr_o inst2 (m2ba[i], aclks, fmba[i],fmba[i],fmba[i],fmba[i]);
  end

  // data strobes
  for (i=0; i<NDQS*8; i=i+8) begin:db
    localparam j = (i>>3);
    
    qdr_ioc ioc1 (m1dqsp[j],m1dqsn[j],cadj,txdt, rd1clk[j], aclks,rdqs[i+0],rdqs[i+1],rdqs[i+2],rdqs[i+3],
                                          oenk,oenk,oenk,oenk, aclks,H,~weni,H,~wenj);
    qdr_ioc ioc2 (m2dqsp[j],m2dqsn[j],cadj,txdt, rd2clk[j], aclks,rdqs[i+4],rdqs[i+5],rdqs[i+6],rdqs[i+7],
                                          oenk,oenk,oenk,oenk, aclks,H,~weni,H,~wenj);

    assign m1rclk[j] = rclk2;
    assign m2rclk[j] = rclk2;

    // data buffers
    qdr_iod iod1 (m1dq[i+7:i],dadj,txdt, m1rclk[j], rclks,ydata[i+7:i+0],ydata[i+39:i+32],ydata[i+71:i+64],ydata[i+103:i+96],
                           woenk,woenk,woenk,woenk, wclks,xdata[i+7:i+0],xdata[i+39:i+32],xdata[i+71:i+64],xdata[i+103:i+96]);  
    qdr_iod iod2 (m2dq[i+7:i],dadj,txdt, m2rclk[j], rclks,ydata[i+23:i+16],ydata[i+55:i+48],ydata[i+87:i+80],ydata[i+119:i+112],
                           woenk,woenk,woenk,woenk, wclks,xdata[i+23:i+16],xdata[i+55:i+48],xdata[i+87:i+80],xdata[i+119:i+112]);  
  end

  endgenerate

  assign trs  = {L,L,keeper}; // to keep from removing IO buffers

  reg [7:0] jjj;
  wire nrd; delaypipe #(7,1) racknrd (aclk,rd1|rd2|rd3|rd4,nrd); // CL10

  always @(negedge nrd or posedge rd1clk[0]) if (!nrd) jjj[0] <= 0; else jjj[0] <= !jjj[0];
  always @(negedge nrd or posedge mclk1)     if (!nrd) jjj[1] <= 0; else jjj[1] <= !jjj[1];
  always @(negedge nrd or posedge rd2clk[0]) if (!nrd) jjj[2] <= 0; else jjj[2] <= !jjj[2];
  always @(negedge nrd or posedge mclk2)     if (!nrd) jjj[3] <= 0; else jjj[3] <= !jjj[3];

  always @(posedge rclk2) jjj[4] <= !jjj[4];
  always @(posedge aclk2) jjj[5] <= !jjj[5];

  wire keeper = rdqs[0] || rdqs[4] || rdqs[8] || rdqs[12] || (jjj!=0);

  reg ccc; always @(posedge aclk) ccc <= !ccc;

//assign test = {oenk,refresh,access_,openrow_,acl,ago,rdclk[0],jjj[0]};
//assign test = {oenk,refresh,acl,ago,jjj};
//assign test = {renl,wenl,acfg,wen, dual,ccc,jjj[1:0]};
//assign test = stest;
//assign test = {fmcke,mrst_,fcmdx,ccc,ttt};
assign test = {ydata[0],rd1,xdata[0],woenk,refresh,fmrst,fcmdx,ccc};
//assign test = {rack,wack,ttt[1:0],cadj};
//assign test = {nrd,wr,jjj[5:0]};


endmodule

// quad data rate clocked output buffer
module qdr_o (out, clks, in0,in1,in2,in3);
  output out;
  input [2:0] clks;
  input in0,in1,in2,in3;

  wire H=1, L=0;
  wire clk=clks[0];
  wire clk2=clks[1];
  wire clkrst=clks[2];
  wire ff,tt;

  OBUFT `MOSTD b0 (.I(ff), .T(tt), .O(out) );

  OSERDESE2 #(.DATA_RATE_OQ("DDR"),.DATA_RATE_TQ("DDR"),.DATA_WIDTH(4))
    f0 (.OQ(ff),.TQ(tt), .CLKDIV(clk),.CLK(clk2),.RST(clkrst), .OCE(1'b1),
         .T1(L),.T2(L),.T3(L),.T4(L), .D1(in0),.D2(in1),.D3(in2),.D4(in3));

endmodule

// quad data rate clocked output buffer with DS
module qdr_od (outp,outn, clks, in0,in1,in2,in3, out);
  output outp,outn;
  input [2:0] clks;
  input in0,in1,in2,in3;
  output out;

  wire H=1, L=0;
  wire clk=clks[0];
  wire clk2=clks[1];
  wire clkrst=clks[2];
  wire ff,tt;

  IOBUFDS `MDIOSTD b0 (.I(ff),.T(tt),.IO(outp),.IOB(outn),.O(out) );

  OSERDESE2 #(.DATA_RATE_OQ("DDR"),.DATA_RATE_TQ("DDR"),.DATA_WIDTH(4))
    f0 (.OQ(ff),.TQ(tt), .CLKDIV(clk),.CLK(clk2),.RST(clkrst), .OCE(1'b1),
         .T1(L),.T2(L),.T3(L),.T4(L), .D1(in0),.D2(in1),.D3(in2),.D4(in3));

endmodule

// double data rate clocked input/output buffer
module qdr_iod (io,cadj,txdt, rstrobe, rclks,rdata0,rdata1,rdata2,rdata3, ena0,ena1,ena2,ena3, wclks,wdata0,wdata1,wdata2,wdata3);
  inout [7:0] io;
  input [3:0] cadj;
  input txdt,rstrobe;
  input [2:0] rclks,wclks;
  input ena0,ena1,ena2,ena3;
  output [7:0] rdata0,rdata1,rdata2,rdata3;
  input  [7:0] wdata0,wdata1,wdata2,wdata3;

  wire H=1, L=0;
  wire rclk=rclks[0];
  wire rclk2=rclks[1];
  wire rclkrst=rclks[2];
  wire wclk=wclks[0];
  wire wclk2=wclks[1];
  wire wclkrst=wclks[2];

  wire sclk = cadj[0];
  wire srst = cadj[1];
  wire sadj = cadj[2];
  wire supd = cadj[3];

 genvar i;
 generate
 for (i=0; i<8; i=i+1) begin:dq
  wire rff,rffd,wff,tff;

  IOBUF `MIOSTD odq (.I(wff),.T(tff),.IO(io[i]),.O(rff) );

  IDELAYE2 dly (.C(sclk),.CE(sadj),.INC(supd),.LD(srst), .IDATAIN(rff),.DATAOUT(rffd));
  defparam dly.HIGH_PERFORMANCE_MODE = "TRUE";
  defparam dly.IDELAY_TYPE = "VARIABLE";
  defparam dly.IDELAY_VALUE = 0;

  ISERDESE2 #(.DATA_RATE("DDR"),.DATA_WIDTH(4),.INTERFACE_TYPE("MEMORY"),.IOBDELAY("IFD"))  // note Q outputs are MSB ordered
    brff (.DDLY(rffd),.CLKDIV(rclk),.OCLK(rclk2),.CLK(rstrobe),.CLKB(~rstrobe),.RST(rclkrst), .Q4(rdata0[i]),.Q3(rdata1[i]),.Q2(rdata2[i]),.Q1(rdata3[i]));

  OSERDESE2 #(.DATA_RATE_OQ("DDR"),.DATA_RATE_TQ("DDR"),.DATA_WIDTH(4))
    bwff (.OQ(wff),.TQ(tff), .CLKDIV(wclk),.CLK(wclk2),.RST(wclkrst), .OCE(1'b1),
          .T1(~ena0),.T2(~ena1),.T3(~ena2),.T4(~ena3), .D1(wdata0[i]),.D2(wdata1[i]),.D3(wdata2[i]),.D4(wdata3[i]));
  end
  endgenerate

endmodule

// double data rate clocked input/output buffer w/ clock input
module qdr_ioc (iop,ion,cadj,txdt, rdata, rclks,rdata0,rdata1,rdata2,rdata3, ena0,ena1,ena2,ena3, wclks,wdata0,wdata1,wdata2,wdata3);
  inout iop,ion;
  input [3:0] cadj;
  input txdt;
  output rdata;
  input [2:0] rclks,wclks;
  input ena0,ena1,ena2,ena3;
  output rdata0,rdata1,rdata2,rdata3;
  input wdata0,wdata1,wdata2,wdata3;

  wire H=1, L=0;
  wire rff,rffd,wff,wffd,tff;
  wire rclk=rclks[0];
  wire rclk2=rclks[1];
  wire rclkrst=rclks[2];
  wire wclk=wclks[0];
  wire wclk2=wclks[1];
  wire wclkrst=wclks[2];

  wire sclk = cadj[0];
  wire srst = cadj[1];
  wire sadj = cadj[2];
  wire supd = cadj[3];

  IOBUFDS `MDIOSTD odq (.I(wff),.T(tff),.IO(iop),.IOB(ion),.O(rff) );

  IDELAYE2 dlyi (.C(sclk),.CE(sadj),.INC(supd),.LD(srst), .IDATAIN(rff),.DATAOUT(rffd));
  defparam dlyi.HIGH_PERFORMANCE_MODE = "TRUE";
  defparam dlyi.IDELAY_TYPE = "VARIABLE";
  defparam dlyi.IDELAY_VALUE = 0;

  ISERDESE2 #(.DATA_RATE("DDR"),.DATA_WIDTH(4),.INTERFACE_TYPE("MEMORY"),.IOBDELAY("IFD"))  // note Q outputs are MSB ordered
    brff (.DDLY(rffd),.CLKDIV(rclk),.OCLK(rclk2),.CLK(rclk2),.CLKB(~rclk2),.RST(rclkrst), .Q4(rdata0),.Q3(rdata1),.Q2(rdata2),.Q1(rdata3), .O(rdata));

  OSERDESE2 #(.DATA_RATE_OQ("DDR"),.DATA_RATE_TQ("DDR"),.DATA_WIDTH(4))
    bwff (.OQ(wff),.TQ(tff), .CLKDIV(wclk),.CLK(wclk2),.RST(wclkrst), .OCE(1'b1),
          .T1(~ena0),.T2(~ena1),.T3(~ena2),.T4(~ena3), .D1(wdata0),.D2(wdata1),.D3(wdata2),.D4(wdata3));

//  ODELAYE2 dlyo (.C(sclk),.CE(sadj),.INC(supd),.LD(srst), .ODATAIN(wff),.DATAOUT(wffd));
//  defparam dlyo.HIGH_PERFORMANCE_MODE = "TRUE";
//  defparam dlyo.ODELAY_TYPE = "VARIABLE";
//  defparam dlyo.ODELAY_VALUE = 0;

endmodule

