/**********************************************
 ICE Proprietary Software - do NOT disseminate
 **********************************************/
/*
  Various RAM / FIFO s

  Fifo write valid on iena
  Fifo read valid one cycle after oena

  Parameter DP is the number of 2Kby DualPort Block RAMS to use.
  If DP=0, it will use 1 or 2 as necessary to cover the larger of the two data bus widths (32b per RAM Interface).

  Parameter 2**BM is number of buffer splits, the ready flags guarantee 2 buffer splits available for read or write
  Thus a 4Kby fifo with BM=3 will always have 4K/8 * 2 or 1K of data available when flags are good

  Flags:
  CINV  Clock Invert
  CENA  Clock Enable
  OREG  Registered Output Buffer
  TIGHT Tight buffer coupling
  PACKET One 64|128|256 byte packet buffering if BM=0

  Jeff Schoen
  Innovative Computer Engineering, Inc.
  1/15/2005

*/

`include "mdefs.h"
`include "../lib/ramdefs.h"

// Block RAM architecture specifics
`ifdef verilator
`define USERAMBXX
//`define DEBUG
// uncomment this to emulate Altera
//`define NO16BRAM
// uncomment this to emulate Virtex4-
//`define NO32BRAM

`elsif VIRTEX2
`define NO32BRAM
`define NORAMREG
`endif

`ifdef VIRTEX4
`define NO32BRAM
`endif

`ifdef VIRTEX5
`define NO32BRAM
`endif

`ifdef SPARTAN6
`define NO32BRAM
`endif

`ifdef ARTIX7
`define NO32BRAM
`endif

`ifdef AGILEX
`define RAMBW 32
`define NO32BRAM
`elsif ALTERA
`define RAMBW 16
`define NO32BRAM
`else
`define RAMBW 32
`endif

`ifdef STRATIX4
`define NO16BRAM
`endif

// Block RAM default kbytes and addr bits
`ifdef NO16BRAM
`define RAMKB 1
`elsif NO32BRAM
`define RAMKB 2
`else
`define RAMKB 4
`endif

`ifdef DMA_PKT_4X
`define DMA_PKT_LEN 4
`define LPKTS 11
`elsif DMA_PKT_2X
`define DMA_PKT_LEN 2
`define LPKTS 10
`else
`define DMA_PKT_LEN 1
`define LPKTS 9
`endif

`ifndef SYS_CLKF_
`define SYS_CLKF_ 100
`endif

module rams();

endmodule

module fiaoNxM (iclk,irst, irdy,ival,idat, 
                oclk,orst, ordy,oena,odma,onxt,oadr,olen,odat);
  parameter ND=8;
  parameter MD=8;
  parameter NR=0;
  parameter BM=3;
  parameter NFLG=0;
  parameter MFLG=0;

  localparam ILEN=256*3;
  localparam NB=(ND>=256)? 32 : (ND>=128)? 16 : (ND>=64)? 8 : (ND>=32)? 4 : (ND>=16)? 2 : 1;

  input irst, iclk, ival;
  input orst, oclk, oena, onxt;
  input [31:0] odma;
  input [19:0] oadr,olen;
  output [1:0] irdy, ordy;

  input [ND-1:0] idat;
  output [MD-1:0] odat;

  // address logic for stream crossbar input 
  wire inxt=ival; wire [5:0] iadr=NB; wire [15:0] ilen=ILEN; wire [31:0] idma=0;

  aiaoNxM #(ND,MD,NR,BM,NFLG|`BSTREAM,MFLG) bif (
      iclk,irst, irdy,ival,idma,inxt,iadr,ilen,idat,
      oclk,orst, ordy,oena,odma,onxt,oadr,olen,odat);

endmodule


module aifoNxM (iclk,irst, irdy,ival,idma,inxt,iadr,ilen,idat, 
                oclk,orst, ordy,oena,odat);
  parameter ND=8;
  parameter MD=8;
  parameter NR=0;
  parameter BM=3;
  parameter NFLG=0;
  parameter MFLG=0;

  localparam OLEN=512;
  localparam MB=(MD>=256)? 32 : (MD>=128)? 16 : (MD>=64)? 8 : (MD>=32)? 4 : (MD>=16)? 2 : 1;

  input irst, iclk, ival, inxt;
  input orst, oclk, oena;
  input [31:0] idma;
  input [19:0] iadr,ilen;
  output [1:0] irdy, ordy;

  input [ND-1:0] idat;
  output [MD-1:0] odat;

  // address logic for stream crossbar output
  wire onxt=oena; wire [5:0] oadr=MB; wire [15:0] olen=OLEN; wire [31:0] odma=0;

  aiaoNxM #(ND,MD,NR,BM,NFLG,MFLG|`BSTREAM) bif (
      iclk,irst, irdy,ival,idma,inxt,iadr,ilen,idat,
      oclk,orst, ordy,oena,odma,onxt,oadr,olen,odat);

endmodule


module aiaoNxM (vclk,vrst, vrdy,vval,vdma,vnxt,vadr,vlen,vdat, 
                wclk,wrst, wrdy,wena,wdma,wnxt,wadr,wlen,wdat);
  parameter ND=8;	// data width in bits
  parameter MD=8;	// data width in bits
  parameter NR=0;	// number of 1K rams
  parameter BX=4;	// address bits in comparison 
  parameter NFLG=0;	// N flags
  parameter MFLG=0;	// M flags

  localparam NKR=(NR>0)? NR : (ND>288 || MD>288)? 32 : (ND>144 || MD>144)? 16 : (ND>72 || MD>72)? 8 : (ND>36 || MD>36)? 4 : 2;
  localparam LRS=(NKR>128)? 21 : (NKR>64)? 20 : (NKR>32)? 19 : (NKR>16)? 18 : (NKR>8)? 17 : (NKR>4)? 16 : (NKR>2)? 15 : 14; // log2 of ram size
  localparam LND=(ND>=256)? 5 : (ND>=128)? 4 : (ND>=64)? 3 : (ND>=32)? 2 : (ND>=16)? 1 : 0; 	// LND|LMD is log2 of port widths in bytes
  localparam LMD=(MD>=256)? 5 : (MD>=128)? 4 : (MD>=64)? 3 : (MD>=32)? 2 : (MD>=16)? 1 : 0;
  localparam NB=(1<<LND);
  localparam MB=(1<<LMD);
  localparam NA=LRS-3;
  localparam MA=LRS-3;
  localparam LA=NA;
  localparam BM=(BX>0)? BX:LA;
  localparam PAC=(LRS-`LPKTS);

  input vrst, vclk, vval, vnxt;
  input wrst, wclk, wena, wnxt;
  input [31:0] vdma, wdma;
  output [1:0] vrdy, wrdy;

  input [NA-1:0] vadr,vlen;	// byte addresses
  input [MA-1:0] wadr,wlen;
  input  [ND-1:0] vdat;
  output [MD-1:0] wdat;

  wire L=0, H=1;

  wire vndm = (NFLG&`BDMA)? L : H;		// noDMA mode
  wire vstr = (NFLG&`BSTREAM)? H : L;		// treat as stream
  reg  [LA:0] vcnt,vcntn;			// 2x address for counter logic
  wire [LA:0] vadrx = (vstr||vndm||vval||vnxt)? vadr:vdma;
  wire [LA:0] vcnta = vcnt+vadrx;
  wire [LA:0] vcntl = vcnt+vlen;

  wire wndm = (MFLG&`BDMA)? L : H;		// noDMA mode
  wire wstr = (MFLG&`BSTREAM)? H : L;		// treat as stream
  reg  [LA:0] wcnt,wcntn;			// 2x address for counter logic
  wire [LA:0] wadrx = (wstr||wndm||wena||wnxt)? wadr:wdma;
  wire [LA:0] wcnta = wcnt+wadrx;
  wire [LA:0] wcntl = wcnt+wlen;

  wire [LA:0] idif = vcntl-wcnt; wire vok = !idif[LA];
  wire [LA:0] odif = vcnt-wcntl; wire wok = !odif[LA] && (odif!=0);

  reg[2:0] vrdx,wrdx;
  reg vrdz,wrdz;

  always @(posedge vclk) begin
    if (vrst) vcnt <= 0; else if (vnxt) vcnt <= (vndm&!vstr)? vcntl:vcnta;
    vrdx <= {vok,vrdx[2:1]};
    if (vrst) vrdz <= 0; else vrdz <= vrdz? (vrdx!=3'h0) : (vrdx==3'h7);
  end
  always @(posedge wclk) begin
    if (wrst) wcnt <= 0; else if (wnxt) wcnt <= (wndm&!wstr)? wcntl:wcnta;
    wrdx <= {wok,wrdx[2:1]};
    if (wrst) wrdz <= 0; else wrdz <= wrdz? (wrdx!=3'h0) : (wrdx==3'h7);
  end
  assign vrdy[0] = vrdz; aiaotrac ftv (vclk,vrst,vaddr[NA-PAC+2:NA-PAC],wrdy[1]);
  assign wrdy[0] = wrdz; aiaotrac ftw (wclk,wrst,waddr[MA-PAC+2:MA-PAC],vrdy[1]);

  wire [ND-1:0] vdum;
  wire [MD-1:0] wdum=0;
  
  wire [NA-1:0] vaddr = vstr? vcnt : vcnta;
  wire [MA-1:0] waddr = wstr? wcnt : wcnta;
  wire [NB-1:0] vena = vval? ~0 : vdma[32-1:32-NB];

  dpram #(NKR,ND,MD,NFLG|`WONLY,MFLG|`RONLY) dps (
      vclk, H, vaddr[NA-1:LND], vena, vdat, vdum,
      wclk, H, waddr[MA-1:LMD], L,    wdum, wdat);

endmodule

module fifoNxM (vclk,vrst, virdy,vival,vidat, 
                wclk,wrst, wordy,woena,wodat);
  parameter ND=8;
  parameter MD=8;
  parameter DP=0;
  parameter BM=3;
  parameter NFLG=0;
  parameter MFLG=0;

  localparam NR=(DP>0)? DP : (ND>288 || MD>288)? 16 : (ND>144 || MD>144)? 8 : (ND>72 || MD>72)? 4 : (ND>36 || MD>36)? 2 : 1; // num 2K rams
  localparam LRS=(NR>64)?  21 : (NR>32)?  20 : (NR>16)?  19 : (NR>8)?  18 : (NR>4)?  17 : (NR>2)?  16 : (NR>1)? 15 : 14; // log2 of ram size
  localparam LND=(ND>=512)? 9 : (ND>=256)? 8 : (ND>=128)? 7 : (ND>=64)? 6 : (ND>=32)? 5 : (ND>=16)? 4 : (ND>=8)? 3 : (ND>=4)? 2 : (ND>=2)? 1 : 0; 
  localparam LMD=(MD>=512)? 9 : (MD>=256)? 8 : (MD>=128)? 7 : (MD>=64)? 6 : (MD>=32)? 5 : (MD>=16)? 4 : (MD>=8)? 3 : (MD>=4)? 2 : (MD>=2)? 1 : 0;
  localparam NA=LRS-LND;	// LND|LMD is log2 or port widths
  localparam MA=LRS-LMD;
  localparam NAC=(NFLG&`PACKET)? LRS-`LPKTS : BM;
  localparam MAC=(MFLG&`PACKET)? LRS-`LPKTS : BM;
  localparam PAC=(LRS-`LPKTS);

  input vclk, vrst, vival;
  input wclk, wrst, woena;
  output [1:0] virdy, wordy;
  input [ND-1:0] vidat;
  output [MD-1:0] wodat;

  wire L=0, H=1;
  wire [ND-1:0] vodat;
  wire [MD-1:0] widat;
  wire vordy,wirdy;
  wire voena=L;
  wire wival=L;
  wire vena,viena;
  wire wena,wiena;
  wire [NA-1:0] vcnt;
  wire [MA-1:0] wcnt;
  fifoaddr #(NA,MA,NAC,NFLG) vfa ( vclk,vrst, virdy[0],vival, vordy,voena, vena,viena,vcnt,wcnt, H); fifotrac ftv (vclk,vrst,vcnt[NA-PAC+2:NA-PAC],wordy[1]);
  fifoaddr #(MA,NA,MAC,MFLG) wfa ( wclk,wrst, wirdy,wival, wordy[0],woena, wena,wiena,wcnt,vcnt, L); fifotrac ftw (wclk,wrst,wcnt[MA-PAC+2:MA-PAC],virdy[1]);

  dpram #(NR*2,ND,MD, NFLG|`WONLY,MFLG|`RONLY) dps (
      vclk, vena, vcnt, viena, vidat, vodat,
      wclk, wena, wcnt, wiena, widat, wodat);

endmodule

module fifotrac #() (clk,rst,adr,phase);
input clk,rst;
input [2:0] adr;
output reg phase;
reg [2:0] rsts,ladr;
always @(posedge clk) begin
  ladr <= adr;
  rsts <= {rsts,rst};
  if (rsts==7) phase <= 0; else if (rsts==4) phase <= 1; else if (ladr==1 && adr==2) phase <= !phase;
end
endmodule

module aiaotrac #() (clk,rst,adr,phase);
input clk,rst;
input [2:0] adr;
output reg phase;
reg [2:0] rsts,ladr;
always @(posedge clk) begin
  ladr <= adr;
  rsts <= {rsts,rst};
  if (rsts==7) phase <= 0; else if (rsts==4) phase <= 1; else if (ladr<4 && adr>=4) phase <= !phase;
end
endmodule

module fifoseqfill (clk,rst, ena,seqerrs, woena,woenb);
  input clk,rst,ena;
  input [3:0] seqerrs;
  input woena;
  output woenb;

  reg cycle,needit,useit;
  reg [5:0] wocnt;
  reg [3:0] fixerrs;
  wire update = woena && (wocnt==31) && cycle;
  wire decide = woena && (wocnt==63);
  always @(posedge clk) begin
    if (rst) useit <= 0;   else useit <= ena;
    needit <= (seqerrs!=fixerrs);
    if (rst) wocnt <= 0;   else if (woena) wocnt <= wocnt+1;
    if (rst) cycle <= 0;   else if (decide) cycle <= needit;
    if (rst) fixerrs <= 0; else if (update) fixerrs <= fixerrs+1;
  end
  assign woenb = useit? (woena && !cycle) : woena;

endmodule

// bidirectional fifo (dir=1 vi-to-wo) (dir=0 wi-to-vo)
module bififoNxM (vclk,vrst, virdx,vival,vidat, vordx,voena,vodat, 
                  wclk,wrst, wirdy,wival,widat, wordy,woena,wodat, dir,bifi);
  parameter ND=8;	// data width in bits
  parameter MD=8;	// data width in bits
  parameter DP=0;	// depth of 2K rams
  parameter BM=4;	// address bits in comparison 
  parameter NFLG=0;	// N flags
  parameter MFLG=0;	// M flags

  localparam NR=(DP>0)? DP : (ND>288 || MD>288)? 16 : (ND>144 || MD>144)? 8 : (ND>72 || MD>72)? 4 : (ND>36 || MD>36)? 2 : 1; // num 2K rams
  localparam LRS=(NR>64)?  21 : (NR>32)?  20 : (NR>16)?  19 : (NR>8)?  18 : (NR>4)?  17 : (NR>2)?  16 : (NR>1)? 15 : 14; // log2 of ram size
  localparam LND=(ND>=512)? 9 : (ND>=256)? 8 : (ND>=128)? 7 : (ND>=64)? 6 : (ND>=32)? 5 : (ND>=16)? 4 : (ND>=8)? 3 : (ND>=4)? 2 : (ND>=2)? 1 : 0; 
  localparam LMD=(MD>=512)? 9 : (MD>=256)? 8 : (MD>=128)? 7 : (MD>=64)? 6 : (MD>=32)? 5 : (MD>=16)? 4 : (MD>=8)? 3 : (MD>=4)? 2 : (MD>=2)? 1 : 0;
  localparam NA=LRS-LND;	// LND|LMD is log2 or port widths
  localparam MA=LRS-LMD;
  localparam NAC=(NFLG&`PACKET)? LRS-`LPKTS : BM;
  localparam MAC=(MFLG&`PACKET)? LRS-`LPKTS : BM;

  input vrst, vclk, vival, voena;
  input wrst, wclk, woena, wival;
  output virdx, wordy, vordx, wirdy;

  input [ND-1:0] vidat;
  output [ND-1:0] vodat;
  input [MD-1:0] widat;
  output [MD-1:0] wodat;

  input dir,bifi;

  wire L=0, H=1;
  wire vena,viena;
  wire wena,wiena;
  wire xena,xiena;
  wire [NA-1:0] vcnt,xcnt;
  wire [MA-1:0] wcnt;
  wire virdy,vordy;
  wire xordy,xirdy;

  fifoaddr #(NA,MA,NAC,NFLG) vfa ( vclk,vrst, virdy,vival, vordy,voena, vena,viena,vcnt,wcnt,  dir);
  fifoaddr #(MA,NA,MAC,MFLG) wfa ( wclk,wrst, wirdy,wival, wordy,woena, wena,wiena,wcnt,vcnt, !dir);
  // this is the readback mode when dir=1
  fifoaddr #(NA,NA,NAC,`FLOOP) xfa ( vclk,vrst, xirdy,L, xordy,voena, xena,xiena,xcnt,vcnt, L);

  dpram #(NR*2,ND,MD,NFLG,MFLG) dps (
      vclk, vena, vival?vcnt:xcnt, viena, vidat, vodat,
      wclk, wena, wcnt, wiena, widat, wodat);

  assign virdx = bifi? virdy && xirdy : virdy;
  assign vordx = bifi? xordy : vordy;

endmodule

// fifo address generator
module fifoaddr (vclk,vrst, virdy,vival, vordy,voena, vena,viena,vcntx,wcnt, dir);

  parameter NA=8;
  parameter MA=8;
  parameter BM=4;
  parameter NFLG=0;
  localparam BMAX=(1<<BM);
  localparam AFN=(NFLG&`DBLBUF)? BMAX/2+1 : (NFLG&`NEMPTY)? 4 : (NFLG&`TIGHT)? 1 : (NFLG&`LOOSE)? 3 : 2;
  localparam NAC=(NFLG&`NEMPTY)? NA : (BM>0)? BM : (MA>NA)? NA : MA;
  localparam MAC=(NAC>MA)? MA : NAC;
  localparam IAFN=BMAX-AFN;
  localparam RAH=(NFLG&`RAHEAD)? 1:0;
  localparam B16=(NFLG&`BURST16)? 1:0;

  input vclk,vrst;
  input vival,voena;
  output vena,viena;
  output virdy,vordy;
  output [NA-1:0] vcntx;
  input [MA-1:0] wcnt;
  input dir;	// 1 = input, 0 = output

  wire L=0, H=1;
  reg [NA-1:0] vcnt;
  reg [NAC-1:0] wtop,vdif;
  reg virdy,vordx,voend,wstable;
  wire [NAC-1:0] afn=(NFLG&`DBLBUF)? AFN+voend : AFN;	// increment needed blocks until read addr catches up if currently reading
  wire vval = dir? vival : voena;
  wire floop= (NFLG&`FLOOP)? H : L;
  wire vclx = (NFLG&`CINV)? ~vclk : vclk;
  wire voenc; delaypipe #(5) vd (vclx,voena,voenc);

  always @(posedge vclx) begin
    wstable <= (NAC>=MA)? H : !wcnt[MA-NAC-1];		// the wtop bits are stable while the next lower bit is not high
    if (wstable) wtop <= (NAC>=MA)? wcnt<<(NAC-MA) : wcnt[MA-1:MA-MAC];
    vdif  <= wtop - vcnt[NA-1:NA-NAC];
    voend <= voena || voenc;
  end

  wire [NA-1:0] vcntp = vcnt+1;
  always @(posedge vclx or posedge vrst) begin
    if (vrst) vcnt  <= 0; else if (vval) vcnt <= vcntp;
    if (vrst) virdy <= 0; else virdy <= floop? (vdif<IAFN) : dir && ((vdif>AFN) || (vdif==0));
    if (vrst) vordx <= 0; else vordx <= !dir && ( (vdif>=afn) || ((NFLG&(`PACKET|`FPKT)) && (vdif>0) && !voend) );  
  end

  assign viena = dir && vival;
  assign vena  = (NFLG&`CENA)? vval : H;
  assign vcntx = (RAH&B16)? {vcnt[NA-1:4],vval?vcntp[3:0]:vcnt[3:0]} : (RAH&vval)? vcntp : vcnt;

 generate
 if (NFLG&`NEMPTY) begin 
  // NEMPTY mode must have voena <= vordy && localOK
  wire [1:0] vdif_ = wtop[1:0] - vcnt[1:0];
  assign vordy = vordx || (voena? (vdif_>1) : (vdif_>0) );
 end
 else begin
  assign vordy = vordx;
 end
 endgenerate

endmodule

// vdif_	5 4 3 2 1 0	5 4 3 2 1
// vdif 	6 5 4 3 2 1 0	6 5 4 3 2
// vodry	1 1 1 1 0 0	
// voena	1 1 1 1 1 0
// vordy_	1 1 1 1 0

// dualchannel fifo 
module dcfifoNxM (vclk,vrst, virdy,vival,vidat, 
                  wclk,wrst, wordy,woena,wodat);
  parameter ND=8;	// data width in bits
  parameter MD=8;	// data width in bits
  parameter DP=0;	// depth of 2K rams
  parameter BM=4;	// address bits in comparison 
  parameter NFLG=0;	// N flags
  parameter MFLG=0;	// M flags

  localparam NR=(DP>0)? DP : (ND>288 || MD>288)? 16 : (ND>144 || MD>144)? 8 : (ND>72 || MD>72)? 4 : (ND>36 || MD>36)? 2 : 1;		// num 2K rams
  localparam LRS=(NR>64)?  21 : (NR>32)? 20 : (NR>16)? 19 : (NR>8)?  18 : (NR>4)? 17 : (NR>2)? 16 : (NR>1)? 15 : 14;			// log2 of ram size
  localparam LND=(ND>=512)? 9 : (ND>=256)? 8 : (ND>=128)? 7 : (ND>=64)? 6 : (ND>=32)? 5 : (ND>=16)? 4 : (ND>=8)? 3 : (ND>=4)? 2 : (ND>=2)? 1 : 0;  	// log2 port width
  localparam LMD=(MD>=512)? 9 : (MD>=256)? 8 : (MD>=128)? 7 : (MD>=64)? 6 : (MD>=32)? 5 : (MD>=16)? 4 : (MD>=8)? 3 : (MD>=4)? 2 : (MD>=2)? 1 : 0;
  localparam NA=LRS-LND;
  localparam MA=LRS-LMD;
  localparam NAC=(NFLG&`PACKET)? LRS-`LPKTS : BM;
  localparam MAC=(MFLG&`PACKET)? LRS-`LPKTS : BM;

  input vrst, vclk;
  input [1:0] vival;
  output [1:0] virdy;
  input [ND-1:0] vidat;

  input wrst, wclk;
  input [2:0] woena;	// woena[2] is peek at port=2 data
  output [1:0] wordy;
  output [MD-1:0] wodat;

  wire L=0, H=1;

  wire [1:0] vordy, wirdy;
  wire [1:0] voena=0;
  wire [1:0] wival=0;

  wire [ND-1:0] vodat;
  wire [MD-1:0] widat;

  wire vena,viena,vena0,viena0,vena1,viena1;
  wire wena,wiena,wena0,wiena0,wena1,wiena1;
  wire [NA-1:0] vcnt,vcnt0,vcnt1;
  wire [MA-1:0] wcnt,wcnt0,wcnt1;

  fifoaddr #(NA-1,MA-1,NAC-1,NFLG) vfa0 ( vclk,vrst, virdy[0],vival[0], vordy[0],voena[0], vena0,viena0,vcnt0,wcnt0, H);
  fifoaddr #(MA-1,NA-1,MAC-1,MFLG) wfa0 ( wclk,wrst, wirdy[0],wival[0], wordy[0],woena[0], wena0,wiena0,wcnt0,vcnt0, L);

  fifoaddr #(NA-1,MA-1,NAC-1,NFLG) vfa1 ( vclk,vrst, virdy[1],vival[1], vordy[1],voena[1], vena1,viena1,vcnt1,wcnt1, H);
  fifoaddr #(MA-1,NA-1,MAC-1,MFLG) wfa1 ( wclk,wrst, wirdy[1],wival[1], wordy[1],woena[1], wena1,wiena1,wcnt1,vcnt1, L);

  assign vena  = vena0  || vena1;
  assign viena = viena0 || viena1;
  assign vcnt  = vival[1]? {H,vcnt1[NA-2:0]} : {L,vcnt0[NA-2:0]};

  assign wena  = wena0  || wena1;
  assign wiena = wiena0 || wiena1;
  assign wcnt  = (woena[1]|woena[2])? {H,wcnt1[MA-2:0]} : {L,wcnt0[MA-2:0]}; 

  dpram #(NR*2,ND,MD,NFLG|`WONLY,MFLG|`RONLY) dps (
      vclk, vena, vcnt, viena, vidat, vodat,
      wclk, wena, wcnt, wiena, widat, wodat);

endmodule

module ctfifoaddr (ctena,ctdual,ctsize,
  vclk,vrst, virdy,vival, vordy,voena, vena,viena,vadr,
  wclk,wrst, wirdy,wival, wordy,woena, wena,wiena,wadr);

  parameter NA=8;
  parameter MA=8;
  parameter BM=4;

  parameter NW=256;
  parameter MW=128;
  localparam OLEN=2*256/(MW/8);

  input ctena,ctdual;
  input [1:0] ctsize;

  input vclk,vrst;
  input vival,voena;
  output vena,viena;
  output virdy,vordy;
  output [NA-1:0] vadr;

  input wclk,wrst;
  input wival,woena;
  output wena,wiena;
  output wirdy,wordy;
  output [MA-1:0] wadr;

  reg [NA:0] vcnt,vcntl,icnt,isize,ilen;
  reg [MA:0] wcnt,wcntl;
  reg [1:0] isz;
  reg dual,phase;
  wire izer = vival && (icnt==isize);
  wire jzer = izer && (!dual || phase);
  wire [BM:0] idif = vcntl[NA:NA-BM]-wcnt[MA:MA-BM]; wire vok = !idif[BM];
  wire [BM:0] odif = vcnt[NA:NA-BM]-wcntl[MA:MA-BM]; wire wok = !odif[BM] && (odif!=0);
  reg [2:0] vrdx,wrdx; reg vrdy,wrdy;
  wire [NA:0] ctlen = 16'h2000 >> (3'h5-isz);       // 3 for isz and 8 samples per line

  always @(posedge vclk) begin
    dual <= ctdual;
    isz  <= ctsize;
    ilen <= ctlen<<dual;
    isize <= ctlen-1;
    if (vrst|!dual) phase <= 0; else if (izer) phase <= !phase;
    if (vrst|izer) icnt <= 0; else if (vival) icnt <= icnt+1;
    if (vrst) vcnt <= 0; else if (jzer) vcnt <= vcnt+ilen;
    vcntl <= vcnt+ilen;
    vrdx <= {vok,vrdx[2:1]};
    vrdy <= (vrdx==3'h7) && !vrst;
  end
  always @(posedge wclk) begin
    if (wrst) wcnt <= 0; else if (woena) wcnt <= wcnt+1;
    wcntl <= wcnt + OLEN;
    wrdx <= {wok,wrdx[2:1]};
    wrdy <= (wrdx==3'h7) && !wrst;
  end
  wire [NA:0] vblk = {icnt[2:0],phase} << (4'h4+dual+isz);
  wire [NA:0] voff = vblk | icnt[NA:3];
`ifdef RAMDBG
  always @(posedge vclk) if (vrst|viena) $write("VCNT = %x   %x:%x   %x:%x  %d:%d %x\n",icnt,vcnt,vadr, vblk,voff, phase,jzer, idif);
  always @(posedge wclk) if (wrst) $write("WCNT = %x %x  %x\n",wcnt,wcnt+1,odif);
`endif
  assign vadr = vcnt+voff;
  assign vena = 1;
  assign viena = vival;
  assign virdy = vrdy;
  assign wadr = wcnt;
  assign wena = 1;
  assign wiena = 0;
  assign wordy = wrdy;

endmodule


// multichannel fifo 
module mcfifoNxM (vclk,vrst, virdy,vival,vidata, 
                  wclk,wrst, wordy,woena,wodat);
  parameter ND=8;	// data width in bits
  parameter MD=8;	// data width in bits
  parameter DP=0;	// depth of 2K rams
  parameter BM=4;	// address bits in comparison 
  parameter NFLG=0;	// N flags
  parameter MFLG=0;	// M flags

  localparam NR=(DP>0)?    DP : (ND>288 || MD>288)? 16 : (ND>144 || MD>144)? 8 : (ND>72 || MD>72)? 4 : (ND>36 || MD>36)? 2 : 1; // num 2K rams
  localparam LRS=(NR>128)? 22 : (NR>64)? 21 : (NR>32)? 20 : (NR>16)? 19 : (NR>8)?  18 : (NR>4)? 17 : (NR>2)? 16 : (NR>1)? 15 : 14; // log2 of ram size
  localparam LND=(ND>=512)? 9 : (ND>=256)? 8 : (ND>=128)? 7 : (ND>=64)? 6 : (ND>=32)? 5 : (ND>=16)? 4 : (ND>=8)? 3 : (ND>=4)? 2 : (ND>=2)? 1 : 0;  	// log2 port width
  localparam LMD=(MD>=512)? 9 : (MD>=256)? 8 : (MD>=128)? 7 : (MD>=64)? 6 : (MD>=32)? 5 : (MD>=16)? 4 : (MD>=8)? 3 : (MD>=4)? 2 : (MD>=2)? 1 : 0;
  localparam NA=LRS-LND;
  localparam MA=LRS-LMD;
  localparam NAC=(NFLG&`PACKET)? LRS-`LPKTS : BM;
  localparam MAC=(MFLG&`PACKET)? LRS-`LPKTS : BM;

  input vclk;
  input [2:0] vrst;
  input [2:0] vival;
  output [2:0] virdy;
  input [ND-1:0] vidata;

  input wclk;
  input [2:0] wrst;
  input [2:0] woena;
  output [2:0] wordy;
  output [MD-1:0] wodat;

  wire L=0, H=1;

  wire [2:0] vordy, wirdy;
  wire [2:0] voena=0;
  wire [2:0] wival=0;

  reg  [ND-1:0] vidat;
  wire [ND-1:0] vodat;
  wire [MD-1:0] widat;

  reg vena,viena;
  wire            vena0,viena0,vena1,viena1,vena2,viena2;
  wire wena,wiena,wena0,wiena0,wena1,wiena1,wena2,wiena2;
  reg  [NA-1:0] vcnt;
  wire [NA-1:0]      vcnt0,vcnt1,vcnt2;
  wire [MA-1:0] wcnt,wcnt0,wcnt1,wcnt2;

  // for multichannels
  fifoaddr #(NA-1,MA-1,NAC-1,NFLG) vfa0 ( vclk,vrst[0], virdy[0],vival[0], vordy[0],voena[0], vena0,viena0,vcnt0,wcnt0, H);
  fifoaddr #(MA-1,NA-1,MAC-1,MFLG) wfa0 ( wclk,wrst[0], wirdy[0],wival[0], wordy[0],woena[0], wena0,wiena0,wcnt0,vcnt0, L);

  // for core1 direct
  fifoaddr #(NA-2,MA-2,NAC-2,NFLG) vfa1 ( vclk,vrst[1], virdy[1],vival[1], vordy[1],voena[1], vena1,viena1,vcnt1,wcnt1, H);
  fifoaddr #(MA-2,NA-2,MAC-2,MFLG) wfa1 ( wclk,wrst[1], wirdy[1],wival[1], wordy[1],woena[1], wena1,wiena1,wcnt1,vcnt1, L);

  // for core2 direct
  fifoaddr #(NA-2,MA-2,NAC-2,NFLG) vfa2 ( vclk,vrst[2], virdy[2],vival[2], vordy[2],voena[2], vena2,viena2,vcnt2,wcnt2, H);
  fifoaddr #(MA-2,NA-2,MAC-2,MFLG) wfa2 ( wclk,wrst[2], wirdy[2],wival[2], wordy[2],woena[2], wena2,wiena2,wcnt2,vcnt2, L);

  always @(posedge vclk) begin
    vena  <= vena0  || vena1  || vena2;
    viena <= viena0 || viena1 || viena2;
    vcnt  <= vival[2]? {H,H,vcnt2[NA-3:0]} : vival[1]? {H,L,vcnt1[NA-3:0]} : {L,vcnt0[NA-2:0]};
    vidat <= vidata;
  end

  assign wena  = wena0  || wena1  || wena2;
  assign wiena = wiena0 || wiena1 || wiena2;
  assign wcnt  = woena[2]? {H,H,wcnt2[MA-3:0]} : woena[1]? {H,L,wcnt1[MA-3:0]} : {L,wcnt0[MA-2:0]}; 

  dpram #(NR*2,ND,MD,NFLG|`WONLY,MFLG|`RONLY) dps (
      vclk, vena, vcnt, viena, vidat, vodat,
      wclk, wena, wcnt, wiena, widat, wodat);

endmodule


// 2-D corner turn + NC chan x NPKTx64by packetizer + FIFO
module fifoNxMpkt (iclk,irst, irdy,ival,idat, 
                   oclk,orst, ordy,oena,odat, opkt_,oraw_,oreal_,size,osel);
  parameter ND=32;
  parameter MD=64;
  parameter DP=0;
  parameter BM=3;
  parameter NFLG=0;
  parameter MFLG=0;
  parameter NC=8;		// log2(channels)

  localparam NCHN=(1<<NC);
  localparam NPKT=`DMA_PKT_LEN;	// num 64by chunks per DMA packet
  localparam NPKTB=(NPKT==4)? 2 : (NPKT==2)? 1 : 0;
  localparam NKB=NCHN*64*2*NPKT/1024;
  localparam NP=NPKTB + ((ND==8)? 6 : (ND==16)? 5 : (ND==32)? 4 : 3);
  localparam NCP=NC+NP;
  localparam NDH=ND/2;
  localparam NKBH=NKB/2;

  input irst, iclk, ival;
  input orst, oclk, oena;
  output irdy, ordy;

  input [ND-1:0] idat;
  output [MD-1:0] odat;

  input oreal_,opkt_,oraw_;
  input [NC-1:0] size;
  output reg [NC-1:0] osel;

  wire L=0, H=1;
  wire [ND-1:0] jdat;
  wire [NP-1:0] full=~0;

  // input ram addressing 
  reg oreal,opkt,oraw;
  reg pass,primed,jval,kval,pha,iq;
  reg [NC-1:0] icnt,sizex;
  reg [NP-1:0] pcnt;
  reg [NCP-1:0] jcnt;
  reg [ND-1:0] kdat;
  wire iinc = ival && (oreal? H    : iq);
  wire izer = iinc && (icnt==sizex);
  wire jinc = ival && pha;
  wire qinc = ival && ((oreal&opkt)? izer : H );
  wire pinc = izer && iq;
  wire jzer = ival && (jcnt=={sizex,full}) && pha;
  wire ival0 = ival && (oraw || !iq);
  wire ival1 = ival && (oraw ||  iq);
  always @(posedge iclk) begin
    oraw <= oraw_;
    opkt <= opkt_;
    oreal <= oreal_;
    sizex <= opkt? size : 0;
    if (irst) pha <= 0; else if (oraw) pha <= 1; else if (ival) pha  <= !pha;
    if (irst)  iq <= 0; else if (oraw)  iq <= 1; else if (qinc) iq   <= !iq;
    if (irst|izer) icnt <= 0; else if (iinc) icnt <= icnt+1;
    if (irst|jzer) pcnt <= 0; else if (pinc) pcnt <= pcnt+1;
    if (irst|jzer) jcnt <= 0; else if (jinc) jcnt <= jcnt+1;
    if (irst)    primed <= 0; else if (jzer) primed <= H;
    if (irst)      pass <= 0; else if (jzer) pass <= !pass;
    jval <= jinc && primed;
    kval <= jval;
    kdat <= jdat;
  end
  wire [NCP:0] iadr = {pass,icnt,pcnt};
  wire [NCP:0] jadr = {!pass,jcnt};

  // dual port RAM for corner turn
  sdpram #(NKBH,NDH,NDH) dpse (iclk,H, iadr,ival0, idat[NDH-1: 0],   iclk,H, jadr,jdat[NDH-1: 0]);
  sdpram #(NKBH,NDH,NDH) dpso (iclk,H, iadr,ival1, idat[ND-1:NDH],   iclk,H, jadr,jdat[ND-1:NDH]);

  // regular output fifio
  fifoNxM #(ND,MD,DP,BM,NFLG,MFLG) bif (iclk,irst, irdy,kval,kdat, oclk,orst, ordy,oena,odat);

  // output channel select
  reg [3:0] ocnt;
  wire osena = oena && ((MD==32)? (ocnt[3:0]==15) : (ocnt[2:0]==7));
  wire osrst = orst || (osena && (osel==sizex));
  always @(posedge oclk) begin
    if (orst)  ocnt <= 0; else if (oena) ocnt <= ocnt+1;
    if (osrst) osel <= 0; else if (osena) osel <= osel+1;
  end

endmodule

module fifoNxMpkt2 (iclk,irst, irdy,ival,idat, 
                    oclk,orst, ordy,oena,odat, opkt_,size,osel);
  parameter ND=32;
  parameter MD=64;
  parameter DP=0;
  parameter BM=3;
  parameter NFLG=0;
  parameter MFLG=0;
  parameter NC=8;		// log2(channels)

  localparam NCHN=(1<<NC);
  localparam NPKT=`DMA_PKT_LEN;	// num 64by chunks per DMA packet
  localparam NPKTB=(NPKT==4)? 2 : (NPKT==2)? 1 : 0;
  localparam NKB=NCHN*64*2*NPKT/1024;
  localparam NP=NPKTB + ((ND==8)? 6 : (ND==16)? 5 : (ND==32)? 4 : 3);
  localparam NCP=NC+NP;
  localparam NDH=ND/2;
  localparam NKBH=NKB/2;

  input irst, iclk, ival;
  input orst, oclk, oena;
  output irdy, ordy;

  input [ND-1:0] idat;
  output [MD-1:0] odat;

  input opkt_;
  input [NC-1:0] size;
  output reg [NC-1:0] osel;

  wire L=0, H=1;
  wire [ND-1:0] jdat;
  wire [NP-1:0] full=~0;

  // input ram addressing 
  reg opkt;
  reg pass,primed,jval,kval;
  reg [NC-1:0] icnt,sizex;
  reg [NP-1:0] pcnt;
  reg [NCP-1:0] jcnt;
  reg [ND-1:0] kdat;
  wire izer = ival && (icnt==sizex);
  wire jzer = ival && (jcnt=={sizex,full});
  always @(posedge iclk) begin
    opkt <= opkt_;
    sizex <= opkt? size : 0;
    if (irst|izer) icnt <= 0; else if (ival) icnt <= icnt+1;
    if (irst|jzer) pcnt <= 0; else if (izer) pcnt <= pcnt+1;
    if (irst|jzer) jcnt <= 0; else if (ival) jcnt <= jcnt+1;
    if (irst)      pass <= 0; else if (jzer) pass <= !pass;
    if (irst)    primed <= 0; else if (jzer) primed <= H;
    jval <= opkt? ival && primed : L;
    kval <= opkt? jval : ival;
    kdat <= opkt? jdat : idat;
  end
  wire [NCP:0] iadr = {pass,icnt,pcnt};
  wire [NCP:0] jadr = {!pass,jcnt};

  // dual port RAM for corner turn
  sdpram #(NKBH,NDH,NDH) dpse (iclk,H, iadr,ival, idat[NDH-1: 0],   iclk,H, jadr,jdat[NDH-1: 0]);
  sdpram #(NKBH,NDH,NDH) dpso (iclk,H, iadr,ival, idat[ND-1:NDH],   iclk,H, jadr,jdat[ND-1:NDH]);

  // regular output fifio
  fifoNxM #(ND,MD,DP,BM,NFLG,MFLG) bif (iclk,irst, irdy,kval,kdat, oclk,orst, ordy,oena,odat);

  // output channel select
  reg [3:0] ocnt;
  wire osena = oena && ((MD==32)? (ocnt[3:0]==15) : (ocnt[2:0]==7));
  wire osrst = orst || (osena && (osel==sizex));
  wire ocrst = orst || !opkt;
  always @(posedge oclk) begin
    if (ocrst) ocnt <= 0; else if (oena)  ocnt <= ocnt+1;
    if (osrst) osel <= 0; else if (osena) osel <= osel+1;
  end

endmodule

module fifoNxMpkt8fe (iclk,irst, irdy,ival,idat, 
                      oclk,orst, ordy,oena,odat, opkt,sizem1,osel);
  parameter ND=32;
  parameter MD=64;
  parameter DP=0;
  parameter BM=3;
  parameter NFLG=0;
  parameter MFLG=0;
  parameter NC=8;		// log2(channels)

  localparam SPB=8;		// samples per block
  localparam IOR=MD/ND;
  localparam IORS=(IOR==8)? 3 : (IOR==4)? 2 : (IOR==2)? 1 : 0;
  localparam NKR=(DP>0)? DP/2 : (1<<NC) * (ND/8) * SPB * 2 / 1024;	// at least double buffer
  localparam NP=(ND>=64)?3:(ND>=32)?2:(ND>=16)?1:0;

  input irst, iclk, ival;
  input orst, oclk, oena;
  output irdy, ordy;

  input [ND-1:0] idat;
  output [MD-1:0] odat;

  input opkt;
  input [NC-1:0] sizem1;
  output reg [7:0] osel;

  wire L=0, H=1;
  wire [NP-1:0] w2bp=0;		// word to byte addressing append
  wire [2:0] c2bp=0;		// channel to block addressing append

  // input output pass per block
  reg [2:0] ipas,opas;
  reg [15:0] jpas;

  // input addressing
  reg jzer,jval;
  reg [ND-1:0] jdat;
  reg [NC-1:0] icnt;
  reg [NC+3:0] jadr,jlen;
  wire [NC:0] size = sizem1+1;
  wire izer = ival && (icnt==sizem1);
  wire izerp = izer && (ipas==7);
  always @(posedge iclk) begin
    if (irst) ipas <= 0; else if (izer) ipas <= ipas+1;
    if (irst) jpas <= 0; else if (izer) jpas <= jpas+1;
    if (irst|izer) icnt <= 0; else if (ival) icnt <= icnt+1;
    jzer <= izerp;
    jval <= ival;
    jadr <= opkt? {icnt,ipas} : {ipas,icnt};
    jlen <= {size,c2bp};
    jdat <= idat;
  end

  // regular array in fifo out
  aifoNxM #(ND,MD,NKR,BM,NFLG,MFLG) bif (iclk,irst, irdy,jval,32'd0,jzer,{jadr,w2bp},{jlen,w2bp},jdat,
					 oclk,orst, ordy,oena,odat);

  // output channel select - 8 deep of 8 channels each - trick with 8x8
  reg [NC-1:0] ocnt;
  wire ozer = oena && (ocnt==sizem1);
  always @(posedge oclk) begin
    if (orst|ozer) ocnt <= 0; else if (oena) ocnt <= ocnt+1;
    if (orst) osel <= 0; else if (opkt) osel <= ocnt[NC-1:3];
  end

endmodule

module fifoNxMpkt8be (vclk,vrst, virdy,vival,vidat, 
                      wclk,wrst, wordy,woena,wodat, ctena,ctdual,ctsize);
  parameter ND=8;	// data width in bits
  parameter MD=8;	// data width in bits
  parameter DP=0;	// depth of 2K rams
  parameter BM=4;	// address bits in comparison 
  parameter NFLG=0;	// N flags
  parameter MFLG=0;	// M flags

  localparam NR=(DP>0)?    DP : (ND>288 || MD>288)? 16 : (ND>144 || MD>144)? 8 : (ND>72 || MD>72)? 4 : (ND>36 || MD>36)? 2 : 1; 			// num 2K rams
  localparam LRS=(NR>128)? 22 : (NR>64)?  21 : (NR>32)? 20  : (NR>16)? 19 : (NR>8)?  18 : (NR>4)? 17  : (NR>2)? 16 : (NR>1)? 15 : 14; 			// log2 of ram size
  localparam LND=(ND>=512)? 9 : (ND>=256)? 8 : (ND>=128)? 7 : (ND>=64)? 6 : (ND>=32)? 5 : (ND>=16)? 4 : (ND>=8)? 3 : (ND>=4)? 2 : (ND>=2)? 1 : 0;  	// log2 port width
  localparam LMD=(MD>=512)? 9 : (MD>=256)? 8 : (MD>=128)? 7 : (MD>=64)? 6 : (MD>=32)? 5 : (MD>=16)? 4 : (MD>=8)? 3 : (MD>=4)? 2 : (MD>=2)? 1 : 0;
  localparam NA=LRS-LND;
  localparam MA=LRS-LMD;

  input vrst, vclk, vival;
  input wrst, wclk, woena;
  output virdy, wordy;

  input [ND-1:0] vidat;
  output [MD-1:0] wodat;

  input ctena,ctdual;
  input [1:0] ctsize;

  wire L=0, H=1;

  wire vena,wena;
  wire viena,wiena;
  wire voena=L,wival=L;
  wire [NA-1:0] vcnt;
  wire [MA-1:0] wcnt;
  wire [ND-1:0] vodat;
  wire [MD-1:0] widat;
  wire vordy,wirdy;

  ctfifoaddr #(NA,MA,MA-4, 256,256) vwfa (ctena,ctdual,ctsize,
	vclk,vrst, virdy,vival, vordy,voena, vena,viena,vcnt,
	wclk,wrst, wirdy,wival, wordy,woena, wena,wiena,wcnt);

  dpram #(NR*2,ND,MD,NFLG|`WONLY,MFLG|`RONLY) dps (
	vclk, vena, vcnt, viena, vidat, vodat,
	wclk, wena, wcnt, wiena, widat, wodat);

endmodule

// single port ROM 
module sprom (vclk,vce, vaddr,vodat);
  parameter NKB=2;
  parameter ND=32;
  parameter FILE="none";
`include "../lib/functions.h"
  localparam NA=log2(NKB*1024*8/ND);
  input vclk,vce;
  input [NA-1:0] vaddr;
  output [ND-1:0] vodat;

  wire L=0,H=1;
`ifdef ALTERA
  wire dummy1,dummy2,dummy3;
  dpramb #(NKB,ND,ND,`RONLY,`COMCLK|`RONLY,FILE) inst (	vclk,vce, vaddr,L, L,L, vodat,dummy1,
							vclk,vce, vaddr,L, L,L, dummy2,dummy3);
`else
  RAMB16_S36_S36 inst  (.CLKA(vclk),.ADDRA(vaddr),.ENA(vce),.WEA(L),.SSRA(L),.DIA(),.DIPA(),.DOA(vodat),.DOPA(),
                        .CLKB(vclk),.ADDRB(vaddr),.ENB(vce),.WEB(L),.SSRB(L),.DIB(),.DIPB(),.DOB(),.DOPB());
 `include "../lib/jvmlut.hx"
`endif
endmodule

// single port RAM 
module spram (vclk, vena, vaddr, viena, vidat, vodat);

  parameter NKB=2;	// kbytes of ram desired
  parameter NW=8;	// width for port A in bits
  parameter NFLG=0;	// flags for port A

  localparam LA=24;	// enough address bits to pass through to dpram without computing here

  input vclk,vena,viena;
  input [LA-1:0] vaddr;
  input [NW-1:0] vidat;
  output [NW-1:0] vodat;

  wire wclk=vclk;
  wire wena=1'b0;
  wire wiena=1'b0;
  wire [LA-1:0] waddr;
  wire [NW-1:0] wodat;
  wire [NW-1:0] widat;	// dummy bus

  dpram #(NKB,NW,NW,NFLG,NFLG|`COMCLK) dpr 
    (vclk, vena, vaddr, viena, vidat, vodat,
     wclk, wena, waddr, wiena, widat, wodat);

endmodule 

// simple dual port RAM with WONLY/RONLY port restrictions
module sdpram (vclk,vena, vaddr, viena, vidat, 
               wclk,wena, waddr,        wodat);

  parameter NKB=2;	// kbytes of ram desired
  parameter NW=8;	// width for port A in bits
  parameter MW=NW;	// width for port B in bits
  parameter NFLG=0;	// flags for port A
  parameter MFLG=0;	// flags for port B

  localparam LA=24;	// enough address bits to pass through to dpram without computing here

  input vclk,vena,viena;
  input [LA-1:0] vaddr;
  input [NW-1:0] vidat;

  input wclk,wena;
  input [LA-1:0] waddr;
  output [MW-1:0] wodat;

  wire [NW-1:0] vodat;	// dummy bus
  wire  [MW-1:0] widat;	// dummy bus
  wire wiena=1'b0;

  dpram #(NKB,NW,MW,NFLG|`WONLY,MFLG|`RONLY) dpr (
     vclk, vena, vaddr, viena, vidat, vodat,
     wclk, wena, waddr, wiena, widat, wodat);

endmodule

// dual port RAM with full R/W from both ports
module dpram (vclk, vena, vaddr, viena, vidat, vodat,
              wclk, wena, waddr, wiena, widat, wodat);

  parameter signed NKB=2;	// kbytes of ram desired or -NR actual rams
  parameter NW=8;		// width for port A in bits
  parameter MW=NW;		// width for port B in bits
  parameter NFLG=0;		// flags for port A
  parameter MFLG=0;		// flags for port B
  parameter FILE="none";

  localparam NWB=(NW>=512)? 64 : (NW>=256)? 32 : (NW>=128)? 16 : (NW>=64)? 8 : NW/8;	// lesser of port width in bytes
  localparam MWB=(MW>=512)? 64 : (MW>=256)? 32 : (MW>=128)? 16 : (MW>=64)? 8 : MW/8;	// greater of port width in bytes
  localparam LW=(NW<MW)? NW:MW;		// lesser of port widths bits
  localparam GW=(NW<MW)? MW:NW;		// greater of port widths bits
  localparam LWB=(NW<MW)? NWB:MWB;	// lesser of port widths bytes
  localparam GWB=(NW<MW)? MWB:NWB;	// greater of port widths bytes
  localparam NWP=NW-NWB*8;
  localparam MWP=MW-MWB*8;

  localparam BPAR=((NFLG&`BPAR) || (MFLG&`BPAR))? 1:0;			// use parity in block mode at top of word
  localparam BENA=((NFLG&`BENA) || (MFLG&`BENA))? 1:0;			// use parity in block mode at top of word
  localparam SDPP=((NFLG&`WONLY) && (MFLG&`RONLY) && `RAMKB==4)? 1:0;	// possible simple dual port mode for Virtex6+
  localparam F2K=(NKB<=2 && GW<=(SDPP?36:18))? 1:0;			// fits in a 2K RAM18 block
  localparam SDP=(SDPP==0)? 0 : (F2K!=0)? 4 : 8;			// simple dual port mode

  localparam RKB=(`RAMKB==4 && F2K!=0)? 2 : `RAMKB;		// dont use 32K ram if 16K will do
  localparam NR=(NKB<0)? -NKB : (NKB<RKB)? 1 : NKB/RKB;		// # of total rams needed by size
  localparam PAR=(LW>8 && LW==LWB*9 && NR<=LWB)?1:0;		// data ports use parity bit
  localparam BPBL=(8+PAR);					// num usable bits per byte lane
  localparam MRW=(((SDP>0)? SDP*8:`RAMBW)/8)*BPBL;		// max supported ram port width
  localparam NRV=(GW+MRW-1)/MRW;				// min rams to support width
//  localparam NRX=(BENA)? 1 : (LW<NR)? LW : (NRV>NR)? NRV : NR;	// # rams wide to use
  localparam NRX=(LW<NR)? LW : (NRV>NR)? NRV : NR;		// # rams wide to use - need to address BENA
  localparam NRW=(PAR!=0 && NRX>LWB && LWB>0)? LWB : (NRX>0)? NRX : 1;	// # rams wide if using parity bits
  localparam NRD=(NR<NRW)? 1 : NR/NRW;				// # rams deep - has to mux output of NRD rams
  localparam LRD=(NRD>8)? 4 : (NRD>4)? 3 : (NRD>2)? 2 : (NRD>1)? 1 : 0;	// log2 of rams deep

  localparam ND=NW/NRW;			// bit width per ram
  localparam MD=MW/NRW;
  localparam LD=(ND<MD)? ND:MD;		// lesser of port widths per ram
  localparam NB=(ND>36)? 8 : (ND>18)? 4 : (ND>9)? 2 : (ND/8);	// data bytes per ram
  localparam MB=(MD>36)? 8 : (MD>18)? 4 : (MD>9)? 2 : (MD/8);	// data bytes per ram
  localparam LND=(ND>=64)? 6 : (ND>=32)? 5 : (ND>=16)? 4 : (ND>=8)? 3 : (ND>=4)? 2 : (ND>=2)? 1 : 0; // log2 port width per ram
  localparam LMD=(MD>=64)? 6 : (MD>=32)? 5 : (MD>=16)? 4 : (MD>=8)? 3 : (MD>=4)? 2 : (MD>=2)? 1 : 0;
  localparam LRS=(RKB==4)? 15 : (RKB==2)? 14 : 13;	// log2 ram size
  localparam NA=LRS-LND+LRD;
  localparam MA=LRS-LMD+LRD;
  localparam LA=(NA<MA)? NA:MA;		// lesser of 2 address widths
  localparam NV=(NB==0)? ND:NB*8;	// padded data bits
  localparam MV=(MB==0)? MD:MB*8;
  localparam NP=(NB==0)? 1:NB;		// parity bits
  localparam MP=(MB==0)? 1:MB;
  localparam LS=(NB!=0 && MB!=0)? 8 : (LD>0)? LD : 1;
  localparam NX=(ND>NV && BPAR==0)? 9 : LS;
  localparam MX=(MD>MV && BPAR==0)? 9 : LS;
  localparam NIR=(NFLG&`IREG)?1:0;	// input register to aid timing - particularly with NRD>1
  localparam MIR=(MFLG&`IREG)?1:0;	// input register to aid timing - particularly with NRD>1
  localparam NOR=(NFLG&`OREG)?1:0;	// output register to aid timing - particularly with NRD>1
  localparam MOR=(MFLG&`OREG)?1:0;	// output register to aid timing - particularly with NRD>1
  localparam NWX=NRW*(NV+NP);
  localparam MWX=NRW*(MV+MP);
  localparam NBE=(NFLG&`BENA)? NW/8:1;
  localparam MBE=(MFLG&`BENA)? MW/8:1;
  localparam NFLGY=((NFLG&`BENA)!=0 && NB<=1)? NFLG^`BENA : NFLG;
  localparam MFLGY=((MFLG&`BENA)!=0 && MB<=1)? MFLG^`BENA : MFLG;
  localparam NFLGX=(NOR>0 && LRD>0)? NFLGY^`OREG : NFLGY;
  localparam MFLGX=(MOR>0 && LRD>0)? MFLGY^`OREG : MFLGY;
  
  input vclk,vena;
  input [NBE-1:0] viena;
  input [NA-1:0] vaddr;
  input [NWX-1:0] vidat;
  output [NWX-1:0] vodat;

  input wclk,wena;
  input [MBE-1:0] wiena;
  input [MA-1:0] waddr;
  input [MWX-1:0] widat;
  output [MWX-1:0] wodat;

  // handle extra input register
  wire [NA-1:0]  vaddx; ff #(NA, NIR) ffn1 (vaddx,vaddr,vclk);
  wire [NWX-1:0] vidax; ff #(NWX,NIR) ffn2 (vidax,vidat,vclk);
  wire [NBE-1:0] vienx; ff #(NBE,NIR) ffn3 (vienx,viena,vclk);

  wire [MA-1:0]  waddx; ff #(MA, MIR) ffm1 (waddx,waddr,wclk);
  wire [MWX-1:0] widax; ff #(MWX,MIR) ffm2 (widax,widat,wclk);
  wire [MBE-1:0] wienx; ff #(MBE,MIR) ffm3 (wienx,wiena,wclk);

  genvar i,j;
  generate
  for (j=0; j<NRW; j=j+1) begin:dpw
    // split rams by width prep
    wire [NV-1:0] vid,vod;
    wire [NP-1:0] vip,vop;
    wire [MV-1:0] wid,wod;
    wire [MP-1:0] wip,wop;
    for (i=0; i<NV/LS; i=i+1) begin:ndc
      assign vid[LS-1+i*LS:i*LS] = vidax[LS-1+(j+i*NRW)*NX:(j+i*NRW)*NX];
      assign vodat[LS-1+(j+i*NRW)*NX:(j+i*NRW)*NX] = vod[LS-1+i*LS:i*LS];
    end
    for (i=0; i<MV/LS; i=i+1) begin:mdc
      assign wid[LS-1+i*LS:i*LS] = widax[LS-1+(j+i*NRW)*MX:(j+i*NRW)*MX];
      assign wodat[LS-1+(j+i*NRW)*MX:(j+i*NRW)*MX] = wod[LS-1+i*LS:i*LS];
    end
    if (ND>NV && MB>0) for (i=0; i<NB; i=i+1) begin:npc
      assign vip[i] = vidax[BPAR? j+NRW*(i+NV) : (j+i*NRW+1)*NX-1];
      assign vodat[BPAR? j+NRW*(i+NV) : (j+i*NRW+1)*NX-1] = vop[i];
    end
    if (MD>MV && NB>0) for (i=0; i<MB; i=i+1) begin:mpc
      assign wip[i] = widax[BPAR? j+NRW*(i+MV) : (j+i*NRW+1)*MX-1];
      assign wodat[BPAR? j+NRW*(i+MV) : (j+i*NRW+1)*MX-1] = wop[i];
    end
    // handle write byte enables - unary version has issues
    wire [NP-1:0] vjenx;
    wire [MP-1:0] wjenx;
    for (i=0; i<NP; i=i+1) begin:nbe
      if (NFLG&`BENA) assign vjenx[i] = vienx[((j+i*NRW)*NX)/8]; else assign vjenx[i] = vienx;
    end
    for (i=0; i<MP; i=i+1) begin:mbe
      if (MFLG&`BENA) assign wjenx[i] = wienx[((j+i*NRW)*MX)/8]; else assign wjenx[i] = wienx;
    end
    // split rams by depth
    if (LRD>0) begin:mdr
      wire [NRD*NV-1:0] vodx;
      wire [NRD*MV-1:0] wodx;
      wire [NRD*NP-1:0] vopx;
      wire [NRD*MP-1:0] wopx;
      for (i=0; i<NRD; i=i+1) begin:dpd
        wire [NP-1:0] vjenxi = (vaddx[NA-1:NA-LRD]==i)?vjenx:0;
        wire [MP-1:0] wjenxi = (waddx[MA-1:MA-LRD]==i)?wjenx:0;
        dpramb #(RKB,NV,MV,NFLGX,MFLGX) inst (
          vclk,vena, vaddx[NA-LRD-1:0], vjenxi, vid,vip, vodx[NV-1+i*NV:i*NV],vopx[NP-1+i*NP:i*NP],
          wclk,wena, waddx[MA-LRD-1:0], wjenxi, wid,wip, wodx[MV-1+i*MV:i*MV],wopx[MP-1+i*MP:i*MP]);
      end
      reg [LRD-1:0] vsel; always @(posedge vclk) if (vena) vsel <= vaddx[NA-1:NA-LRD];
      reg [LRD-1:0] wsel; always @(posedge wclk) if (wena) wsel <= waddx[MA-1:MA-LRD];
      muxMxNp #(NRD,NV,NOR) mxnd (vod,vodx,vsel,vclk);
      muxMxNp #(NRD,NP,NOR) mxnp (vop,vopx,vsel,vclk);
      muxMxNp #(NRD,MV,MOR) mxmd (wod,wodx,wsel,wclk);
      muxMxNp #(NRD,MP,MOR) mxmp (wop,wopx,wsel,wclk);
    end
    // split rams by width 
    if (LRD<=0) begin:sdr
      dpramb #(RKB,NV,MV,NFLGX,MFLGX) inst (
        vclk,vena, vaddx,vjenx, vid,vip, vod,vop,
        wclk,wena, waddx,wjenx, wid,wip, wod,wop);
    end
  end
  if (BPAR>0 && (NB==0 || MB==0)) begin:bpr	// 4 bit modes didn't support parity 
      wire vpi,vpo,wpi,wpo;
`ifdef MAYBE
      dpramb #(2,8,8,0,0) inst (
	vclk,vena, vaddr[NA-1:NA-LA],viena, vidat[7+NWB*8:NWB*8],vpi, vodat[7+NWB*8:NWB*8],vpo,
	wclk,wena, waddr[MA-1:MA-LA],wiena, widat[7+MWB*8:MWB*8],wpi, wodat[7+MWB*8:MWB*8],wpo);
`else
      dpram #(NKB/8,NWP,MWP,NFLG&`COMCLK,0) inst (
	vclk,vena, vaddr,viena, vidat[NW-1:NWB*8], vodat[NW-1:NWB*8],
	wclk,wena, waddr,wiena, widat[MW-1:MWB*8], wodat[MW-1:MWB*8]);
`endif
  end
  endgenerate

endmodule

module dpramb (vclk, vena, vaddr, viena, vid,vip, vod,vop,
               wclk, wena, waddr, wiena, wid,wip, wod,wop);

  parameter RKB=2;	// RAM depth in KB
  parameter ND=8;	// port A width
  parameter MD=8;	// port B width
  parameter NFLG=0;	// port A flags
  parameter MFLG=0;	// port B flags
  parameter FILE="none";

`ifdef AGILEX
  localparam AGX=1;
`else
  localparam AGX=0;
`endif
`include "../lib/functions.h"

  localparam LRS=(RKB>4)? 16 : (RKB>2)? 15 : (RKB>1)? 14 : 13;	// log2 ram size in bits
  localparam NB=(ND>36)? 8 : (ND>18)? 4 : (ND>9)? 2 : (ND>4)? 1 : 0;	// num bytes width A
  localparam MB=(MD>36)? 8 : (MD>18)? 4 : (MD>9)? 2 : (MD>4)? 1 : 0;	// num bytes width B
  localparam LND=(NB==8)? 6 : (NB==4)? 5 : (NB==2)? 4 : (NB==1)? 3 : (ND==4)? 2 : (ND==2)? 1 : 0; // log2 port width A
  localparam LMD=(MB==8)? 6 : (MB==4)? 5 : (MB==2)? 4 : (MB==1)? 3 : (MD==4)? 2 : (MD==2)? 1 : 0; // log2 port width B
  localparam NA=LRS-LND;		// num address bits A
  localparam MA=LRS-LMD;		// num address bits B
  localparam NW=(NB==0)? ND:NB*9;
  localparam MW=(MB==0)? MD:MB*9;
  localparam NV=(NB==0)? ND:NB*8;
  localparam MV=(MB==0)? MD:MB*8;
  localparam NP=(NB==0)? 1:NB;
  localparam MP=(MB==0)? 1:MB;
  localparam GW=(MW>NW)? MW:NW;
  localparam SDP=((NFLG&`WONLY) && (MFLG&`RONLY))? 1:0;	// use simple dual port mode
  localparam CCM=((NFLG&`COMCLK) || (MFLG&`COMCLK))? AGX:0;	// Common Clock Mode for Agilex
  localparam MREG=            (MFLG&`OREG)? 1:0;
  localparam NREG=SDP? MREG : (NFLG&`OREG)? 1:0;
  localparam NCI=(NFLG&`CINV)? 1:0;
  localparam MCI=(MFLG&`CINV)? 1:0;
  localparam NEI=(NFLG&`EINV)? 1:0;
  localparam MEI=(MFLG&`EINV)? 1:0;
  localparam NWM=(NFLG&`RFIRST)?"READ_FIRST":(NFLG&`WFIRST)?"WRITE_FIRST":SDP?"WRITE_FIRST":"NO_CHANGE";
  localparam MWM=(MFLG&`RFIRST)?"READ_FIRST":(MFLG&`WFIRST)?"WRITE_FIRST":SDP?"WRITE_FIRST":"NO_CHANGE";
  
  input vclk,vena;
  input [NP-1:0] viena;
  input [NA-1:0] vaddr;
  input [ND-1:0] vid;
  input [NP-1:0] vip;
  output [ND-1:0] vod;
  output [NP-1:0] vop;

  input wclk,wena;
  input [MP-1:0] wiena;
  input [MA-1:0] waddr;
  input [MD-1:0] wid;
  input [MP-1:0] wip;
  output [MD-1:0] wod;
  output [MP-1:0] wop;

  wire H=1, L=0;

  wire [7:0] vjena = (NFLG&`BENA)? {8{viena}} : {8{viena[0]}};
  wire [7:0] wjena = (MFLG&`BENA)? {8{wiena}} : {8{wiena[0]}};

`ifdef NORAMREG

  wire [ND-1:0] vodb;
  wire [NP-1:0] vopb;
  reg [ND-1:0]  vodr; always @(posedge vclk) vodr <= vodb;
  reg [NP-1:0]  vopr; always @(posedge vclk) vopr <= vopb;

  wire [MD-1:0] wodb;
  wire [MP-1:0] wopb;
  reg [MD-1:0]  wodr; always @(posedge wclk) wodr <= wodb;
  reg [MP-1:0]  wopr; always @(posedge wclk) wopr <= wopb;

  assign vod = NREG? vodr : vodb;
  assign vop = NREG? vopr : vopb;
  assign wod = MREG? wodr : wodb;
  assign wop = MREG? wopr : wopb;

`endif

`ifdef USERAMBXX

  generate
  if (MW>NW) begin:ab
    RAMBXX #(NV,MV,LRS,NFLG,MFLG) ram (
    .CLKA(NCI?~vclk:vclk),.ENA(vena),.WEA_(vjena),.SSRA(L), .ADDRA(vaddr), .DIA_(vid),.DIPA_(vip), .DOA(vod),.DOPA(vop),
    .CLKB(MCI?~wclk:wclk),.ENB(wena),.WEB_(wjena),.SSRB(L), .ADDRB(waddr), .DIB_(wid),.DIPB_(wip), .DOB(wod),.DOPB(wop));
  end
  else begin:ba
    RAMBXX #(MV,NV,LRS,MFLG,NFLG) ram (
    .CLKB(NCI?~vclk:vclk),.ENB(vena),.WEB_(vjena),.SSRB(L), .ADDRB(vaddr), .DIB_(vid),.DIPB_(vip), .DOB(vod),.DOPB(vop),
    .CLKA(MCI?~wclk:wclk),.ENA(wena),.WEA_(wjena),.SSRA(L), .ADDRA(waddr), .DIA_(wid),.DIPA_(wip), .DOA(wod),.DOPA(wop));
  end
  endgenerate

`elsif ALTERA

 generate
 if (AGX && !SDP && !CCM) begin
  // use one dpram for A->B and one for B->A assuming common data is not required
  dpramb_alt #(RKB,ND,MD,NFLG|`WONLY,MFLG|`RONLY,FILE) a2b (vclk, vena, vaddr, viena, vid,vip ,,,  wclk, wena, waddr, L, L,L, wod,wop);
  dpramb_alt #(RKB,MD,ND,MFLG|`WONLY,NFLG|`RONLY,FILE) b2a (wclk, wena, waddr, wiena, wid,wip ,,,  vclk, vena, vaddr, L, L,L, vod,vop);
 end
 else begin
  dpramb_alt #(RKB,ND,MD,NFLG,MFLG,FILE) alt (vclk,vena, vaddr,viena, vid,vip ,vod,vop,  wclk,wena, waddr,wiena, wid,wip, wod,wop);
 end
 endgenerate
  
`elsif VIRTEX2

 generate 
  if (NW==1 && MW==1) begin:c1x1
    RAMB16_S1_S1 ram (
    .ADDRA(vaddr), .DIA(vid), .CLKA(NCI?~vclk:vclk), .ENA(NEI?~vena:vena), .WEA(viena), .SSRA(L), .DOA(vodb),
    .ADDRB(waddr), .DIB(wid), .CLKB(MCI?~wclk:wclk), .ENB(MEI?~wena:wena), .WEB(wiena), .SSRB(L), .DOB(wodb));
  end
  if (NW==1 && MW==2) begin:c1x2
    RAMB16_S1_S2 ram (
    .ADDRA(vaddr), .DIA(vid), .CLKA(NCI?~vclk:vclk), .ENA(NEI?~vena:vena), .WEA(viena), .SSRA(L), .DOA(vodb),
    .ADDRB(waddr), .DIB(wid), .CLKB(MCI?~wclk:wclk), .ENB(MEI?~wena:wena), .WEB(wiena), .SSRB(L), .DOB(wodb));
  end
  if (NW==2 && MW==1) begin:c2x1
    RAMB16_S1_S2 ram (
    .ADDRA(waddr), .DIA(wid), .CLKA(NCI?~wclk:wclk), .ENA(NEI?~wena:wena), .WEA(wiena), .SSRA(L), .DOA(wodb),
    .ADDRB(vaddr), .DIB(vid), .CLKB(MCI?~vclk:vclk), .ENB(MEI?~vena:vena), .WEB(viena), .SSRB(L), .DOB(vodb));
  end
  if (NW==2 && MW==2) begin:c2x2
    RAMB16_S2_S2 ram (
    .ADDRA(vaddr), .DIA(vid), .CLKA(NCI?~vclk:vclk), .ENA(NEI?~vena:vena), .WEA(viena), .SSRA(L), .DOA(vodb),
    .ADDRB(waddr), .DIB(wid), .CLKB(MCI?~wclk:wclk), .ENB(MEI?~wena:wena), .WEB(wiena), .SSRB(L), .DOB(wodb));
  end
  if (NW==4 && MW==4) begin:c4x4
    RAMB16_S4_S4 ram (
    .ADDRA(vaddr), .DIA(vid), .CLKA(NCI?~vclk:vclk), .ENA(NEI?~vena:vena), .WEA(viena), .SSRA(L), .DOA(vodb),
    .ADDRB(waddr), .DIB(wid), .CLKB(MCI?~wclk:wclk), .ENB(MEI?~wena:wena), .WEB(wiena), .SSRB(L), .DOB(wodb));
  end
  if (NW==4 && MW==9) begin:c4x9
    RAMB16_S4_S9 ram (
    .ADDRA(vaddr), .DIA(vid), .CLKA(NCI?~vclk:vclk), .ENA(NEI?~vena:vena), .WEA(viena), .SSRA(L), .DOA(vodb),
    .ADDRB(waddr), .DIB(wid), .CLKB(MCI?~wclk:wclk), .ENB(MEI?~wena:wena), .WEB(wiena), .SSRB(L), .DOB(wodb),.DIPB(wip));
  end
  if (NW==4 && MW==18) begin:c4x18
    RAMB16_S4_S18 ram (
    .ADDRA(vaddr), .DIA(vid), .CLKA(NCI?~vclk:vclk), .ENA(NEI?~vena:vena), .WEA(viena), .SSRA(L), .DOA(vodb),
    .ADDRB(waddr), .DIB(wid), .CLKB(MCI?~wclk:wclk), .ENB(MEI?~wena:wena), .WEB(wiena), .SSRB(L), .DOB(wodb),.DIPB(wip));
  end
  if (NW==4 && MW==36) begin:c4x36
    RAMB16_S4_S36 ram (
    .ADDRA(vaddr), .DIA(vid), .CLKA(NCI?~vclk:vclk), .ENA(NEI?~vena:vena), .WEA(viena), .SSRA(L), .DOA(vodb),
    .ADDRB(waddr), .DIB(wid), .CLKB(MCI?~wclk:wclk), .ENB(MEI?~wena:wena), .WEB(wiena), .SSRB(L), .DOB(wodb),.DIPB(wip));
  end
  if (NW==9 && MW==9) begin:c9x9
    RAMB16_S9_S9 ram (
    .ADDRA(vaddr), .DIA(vid),.DIPA(vip), .CLKA(NCI?~vclk:vclk), .ENA(NEI?~vena:vena), .WEA(viena), .SSRA(L), .DOA(vodb),.DOPA(vopb),
    .ADDRB(waddr), .DIB(wid),.DIPB(wip), .CLKB(MCI?~wclk:wclk), .ENB(MEI?~wena:wena), .WEB(wiena), .SSRB(L), .DOB(wodb),.DOPB(wopb));
  end
  if (NW==9 && MW==18) begin:c9x18
    RAMB16_S9_S18 ram (
    .ADDRA(vaddr), .DIA(vid),.DIPA(vip), .CLKA(NCI?~vclk:vclk), .ENA(NEI?~vena:vena), .WEA(viena), .SSRA(L), .DOA(vodb),.DOPA(vopb),
    .ADDRB(waddr), .DIB(wid),.DIPB(wip), .CLKB(MCI?~wclk:wclk), .ENB(MEI?~wena:wena), .WEB(wiena), .SSRB(L), .DOB(wodb),.DOPB(wopb));
  end
  if (NW==9 && MW==36) begin:c9x36
    RAMB16_S9_S36 ram (
    .ADDRA(vaddr), .DIA(vid),.DIPA(vip), .CLKA(NCI?~vclk:vclk), .ENA(NEI?~vena:vena), .WEA(viena), .SSRA(L), .DOA(vodb),.DOPA(vopb),
    .ADDRB(waddr), .DIB(wid),.DIPB(wip), .CLKB(MCI?~wclk:wclk), .ENB(MEI?~wena:wena), .WEB(wiena), .SSRB(L), .DOB(wodb),.DOPB(wopb));
  end
  if (NW==18 && MW==9) begin:c18x9
    RAMB16_S9_S18 ram (
    .ADDRA(waddr), .DIA(wid),.DIPA(wip), .CLKA(MCI?~wclk:wclk), .ENA(MEI?~wena:wena), .WEA(wiena), .SSRA(L), .DOA(wodb),.DOPA(wopb),
    .ADDRB(vaddr), .DIB(vid),.DIPB(vip), .CLKB(NCI?~vclk:vclk), .ENB(NEI?~vena:vena), .WEB(viena), .SSRB(L), .DOB(vodb),.DOPB(vopb));
  end
  if (NW==18 && MW==18) begin:c18x18
    RAMB16_S18_S18 ram (
    .ADDRA(vaddr), .DIA(vid),.DIPA(vip), .CLKA(NCI?~vclk:vclk), .ENA(NEI?~vena:vena), .WEA(viena), .SSRA(L), .DOA(vodb),.DOPA(vopb),
    .ADDRB(waddr), .DIB(wid),.DIPB(wip), .CLKB(MCI?~wclk:wclk), .ENB(MEI?~wena:wena), .WEB(wiena), .SSRB(L), .DOB(wodb),.DOPB(wopb));
  end
  if (NW==18 && MW==36) begin:c18x36
    RAMB16_S18_S36 ram (
    .ADDRA(vaddr), .DIA(vid),.DIPA(vip), .CLKA(NCI?~vclk:vclk), .ENA(NEI?~vena:vena), .WEA(viena), .SSRA(L), .DOA(vodb),.DOPA(vopb),
    .ADDRB(waddr), .DIB(wid),.DIPB(wip), .CLKB(MCI?~wclk:wclk), .ENB(MEI?~wena:wena), .WEB(wiena), .SSRB(L), .DOB(wodb),.DOPB(wopb));
  end
  if (NW==36 && MW==4) begin:c36x4
    RAMB16_S4_S36 ram (
    .ADDRA(waddr), .DIA(wid), .CLKA(MCI?~wclk:wclk), .ENA(MEI?~wena:wena), .WEA(wiena), .SSRA(L), .DOA(wodb),
    .ADDRB(vaddr), .DIB(vid), .CLKB(NCI?~vclk:vclk), .ENB(NEI?~vena:vena), .WEB(viena), .SSRB(L), .DOB(vodb),.DOPB(vopb));
  end
  if (NW==36 && MW==9) begin:c36x9
    RAMB16_S9_S36 ram (
    .ADDRA(waddr), .DIA(wid),.DIPA(wip), .CLKA(MCI?~wclk:wclk), .ENA(MEI?~wena:wena), .WEA(wiena), .SSRA(L), .DOA(wodb),.DOPA(wopb),
    .ADDRB(vaddr), .DIB(vid),.DIPB(vip), .CLKB(NCI?~vclk:vclk), .ENB(NEI?~vena:vena), .WEB(viena), .SSRB(L), .DOB(vodb),.DOPB(vopb));
  end
  if (NW==36 && MW==18) begin:c36x18
    RAMB16_S18_S36 ram (
    .ADDRA(waddr), .DIA(wid),.DIPA(wip), .CLKA(MCI?~wclk:wclk), .ENA(MEI?~wena:wena), .WEA(wiena), .SSRA(L), .DOA(wodb),.DOPA(wopb),
    .ADDRB(vaddr), .DIB(vid),.DIPB(vip), .CLKB(NCI?~vclk:vclk), .ENB(NEI?~vena:vena), .WEB(viena), .SSRB(L), .DOB(vodb),.DOPB(vopb));
  end
  if (NW==36 && MW==36) begin:c36x36
    RAMB16_S36_S36 ram (
    .ADDRA(vaddr), .DIA(vid),.DIPA(vip), .CLKA(NCI?~vclk:vclk), .ENA(NEI?~vena:vena), .WEA(viena), .SSRA(L), .DOA(vodb),.DOPA(vopb),
    .ADDRB(waddr), .DIB(wid),.DIPB(wip), .CLKB(MCI?~wclk:wclk), .ENB(MEI?~wena:wena), .WEB(wiena), .SSRB(L), .DOB(wodb),.DOPB(wopb));
  end
 endgenerate

`elsif SPARTAN6

  wire [LRS-1:0] vadrx = {vaddr,{LND{L}}};
  wire [LRS-1:0] wadrx = {waddr,{LMD{L}}};
  RAMB16BWER ram (
  .CLKA(NCI?~vclk:vclk),.ENA(NEI?~vena:vena),.WEA(vjena),.RSTA(L), .ADDRA(vadrx), .DIA(vid),.DIPA(vip), .DOA(vod),.DOPA(vop),
  .CLKB(MCI?~wclk:wclk),.ENB(MEI?~wena:wena),.WEB(wjena),.RSTB(L), .ADDRB(wadrx), .DIB(wid),.DIPB(wip), .DOB(wod),.DOPB(wop),
  .REGCEA(NREG),.REGCEB(MREG));
  defparam ram.DOA_REG = NREG;
  defparam ram.DATA_WIDTH_A = NW;
  defparam ram.DOB_REG = MREG; 
  defparam ram.DATA_WIDTH_B = MW;
  defparam ram.SIM_DEVICE = "SPARTAN6";

`elsif NO32BRAM

  wire [LRS:0] vadrx = {H,vaddr,{LND{H}}};
  wire [LRS:0] wadrx = {H,waddr,{LMD{H}}};
  RAMB16 #(.DOA_REG(NREG),.READ_WIDTH_A(NW),.WRITE_WIDTH_A(NW),.WRITE_MODE_A(NWM),
           .DOB_REG(MREG),.READ_WIDTH_B(MW),.WRITE_WIDTH_B(MW),.WRITE_MODE_B(MWM)) ram (
    .CLKA(NCI?~vclk:vclk),.ENA(NEI?~vena:vena),.WEA(vjena),.SSRA(L), .ADDRA(vadrx), .DIA(vid),.DIPA(vip), .DOA(vod),.DOPA(vop),
    .CLKB(MCI?~wclk:wclk),.ENB(MEI?~wena:wena),.WEB(wjena),.SSRB(L), .ADDRB(wadrx), .DIB(wid),.DIPB(wip), .DOB(wod),.DOPB(wop),
    .REGCEA(NREG),.REGCEB(MREG));

`else

  wire [LRS:0] vadrx = {H,vaddr,{LND{H}}};
  wire [LRS:0] wadrx = {H,waddr,{LMD{H}}};
 // Xilinx has special SDP mode for B->A so swap the ports since sdpram calls as A->B 
 // The SDP on Xilinx uses muxes both A&B input ports and both A&B output ports to get the wider access
 generate 
  if (LRS<15 && GW<=(SDP?36:18)) begin:r16k
    wire [1:0] xipa,xipb,xopa,xopb;
    wire [15:0] xida,xidb,xoda,xodb;
    assign xida = (SDP && NW==36)? vid[15:0]  : wid;
    assign xipa = (SDP && NW==36)? vip[1:0]   : wip;
    assign xidb = (SDP && NW==36)? vid[31:16] : vid;
    assign xipb = (SDP && NW==36)? vip[3:2]   : vip;
    RAMB18E1 #(.DOA_REG(MREG),.READ_WIDTH_A(MW),.WRITE_WIDTH_A(SDP?0:MW),.WRITE_MODE_A(MWM),
               .DOB_REG(NREG),.READ_WIDTH_B(SDP?0:NW),.WRITE_WIDTH_B(NW),.WRITE_MODE_B(NWM),.RAM_MODE(SDP?"SDP":"TDP")) ram (
    .CLKARDCLK(MCI?~wclk:wclk),.ENARDEN(MEI?~wena:wena),.WEA  (wjena), .ADDRARDADDR(wadrx), .DIADI(xida),.DIPADIP(xipa), .DOADO(xoda),.DOPADOP(xopa),
    .CLKBWRCLK(NCI?~vclk:vclk),.ENBWREN(NEI?~vena:vena),.WEBWE(vjena), .ADDRBWRADDR(vadrx), .DIBDI(xidb),.DIPBDIP(xipb), .DOBDO(xodb),.DOPBDOP(xopb),
    .REGCEAREGCE(MREG),.REGCEB(NREG),.RSTRAMARSTRAM(L),.RSTRAMB(L),.RSTREGARSTREG(L),.RSTREGB(L));
    assign wod = {xodb,xoda}; assign vod = xodb;
    assign wop = {xopb,xopa}; assign vop = xopb;
  end
  else begin:r32k
    wire [3:0] xipa,xipb,xopa,xopb;
    wire [31:0] xida,xidb,xoda,xodb;
    assign xida = (SDP && NW==72)? vid[31:0]  : wid;
    assign xipa = (SDP && NW==72)? vip[3:0]   : wip;
    assign xidb = (SDP && NW==72)? vid[63:32] : vid;
    assign xipb = (SDP && NW==72)? vip[7:4]   : vip;
    RAMB36E1 #(.DOA_REG(MREG),.READ_WIDTH_A(MW),.WRITE_WIDTH_A(SDP?0:MW),.WRITE_MODE_A(MWM),
               .DOB_REG(NREG),.READ_WIDTH_B(SDP?0:NW),.WRITE_WIDTH_B(NW),.WRITE_MODE_B(NWM),.RAM_MODE(SDP?"SDP":"TDP")) ram (
    .CLKARDCLK(MCI?~wclk:wclk),.ENARDEN(MEI?~wena:wena),.WEA  (wjena), .ADDRARDADDR(wadrx), .DIADI(xida),.DIPADIP(xipa), .DOADO(xoda),.DOPADOP(xopa),
    .CLKBWRCLK(NCI?~vclk:vclk),.ENBWREN(NEI?~vena:vena),.WEBWE(vjena), .ADDRBWRADDR(vadrx), .DIBDI(xidb),.DIPBDIP(xipb), .DOBDO(xodb),.DOPBDOP(xopb),
    .REGCEAREGCE(MREG),.REGCEB(NREG),.RSTRAMARSTRAM(L),.RSTRAMB(L),.RSTREGARSTREG(L),.RSTREGB(L));
    assign wod = {xodb,xoda}; assign vod = xodb;
    assign wop = {xopb,xopa}; assign vop = xopb;
  end
 endgenerate

`endif

endmodule

`ifdef ALTERA
module dpramb_alt (vclk, vena, vaddr, viena, vid,vip, vod,vop,
                   wclk, wena, waddr, wiena, wid,wip, wod,wop);

  parameter RKB=2;	// RAM depth in KB
  parameter ND=8;	// port A width
  parameter MD=8;	// port B width
  parameter NFLG=0;	// port A flags
  parameter MFLG=0;	// port B flags
  parameter FILE="none";

`ifdef AGILEX
  localparam AGX=1;
`else
  localparam AGX=0;
`endif
  localparam LRS=(RKB>4)? 16 : (RKB>2)? 15 : (RKB>1)? 14 : 13;	// log2 ram size in bits
  localparam NB=(ND>36)? 8 : (ND>18)? 4 : (ND>9)? 2 : (ND>4)? 1 : 0;	// num bytes width A
  localparam MB=(MD>36)? 8 : (MD>18)? 4 : (MD>9)? 2 : (MD>4)? 1 : 0;	// num bytes width B
  localparam LND=(NB==8)? 6 : (NB==4)? 5 : (NB==2)? 4 : (NB==1)? 3 : (ND==4)? 2 : (ND==2)? 1 : 0; // log2 port width A
  localparam LMD=(MB==8)? 6 : (MB==4)? 5 : (MB==2)? 4 : (MB==1)? 3 : (MD==4)? 2 : (MD==2)? 1 : 0; // log2 port width B
  localparam NA=LRS-LND;		// num address bits A
  localparam MA=LRS-LMD;		// num address bits B
  localparam NV=(NB==0)? ND:NB*8;
  localparam MV=(MB==0)? MD:MB*8;
  localparam NP=(NB==0)? 1:NB;
  localparam MP=(MB==0)? 1:MB;
  localparam SDP=((NFLG&`WONLY) && (MFLG&`RONLY))? 1:0;	// use simple dual port mode
  localparam CCM=((NFLG&`COMCLK) || (MFLG&`COMCLK))? AGX:0;	// Common Clock Mode for Agilex
  localparam MREG=            (MFLG&`OREG)? 1:0;
  localparam NREG=SDP? MREG : (NFLG&`OREG)? 1:0;
  
  input vclk,vena;
  input [NP-1:0] viena;
  input [NA-1:0] vaddr;
  input [ND-1:0] vid;
  input [NP-1:0] vip;
  output [ND-1:0] vod;
  output [NP-1:0] vop;

  input wclk,wena;
  input [MP-1:0] wiena;
  input [MA-1:0] waddr;
  input [MD-1:0] wid;
  input [MP-1:0] wip;
  output [MD-1:0] wod;
  output [MP-1:0] wop;

  wire H=1, L=0;

  wire [7:0] vjena = (NFLG&`BENA)? {8{viena}} : {8{viena[0]}};
  wire [7:0] wjena = (MFLG&`BENA)? {8{wiena}} : {8{wiena[0]}};

  localparam EQW=(NV==MV)? 1:0;
  localparam NMX=(AGX && CCM && !EQW)? 1:0;	// for AGILEX need to implement NV!=MV with input/output mux/demux
  localparam XV =(MV>NV)? MV:NV;
  localparam NXO=(NMX)? log2(XV/NV) : 0;
  localparam MXO=(NMX)? log2(XV/MV) : 0;
  localparam NX =(1<<NXO);
  localparam MX =(1<<MXO);
  localparam MFN=(FILE=="none")? "" : {"../lib/",FILE,".mif"};
  localparam RBS=((ND<9)||(MD<9)||(MFN!=""))? 8:10;
  localparam ANV=(RBS==8)? NV:NB*10;
  localparam AMV=(RBS==8)? MV:MB*10;
  localparam ANVX=ANV*NX;
  localparam AMVX=AMV*MX;
  localparam ANVB=((NFLG&`BENA) && (ANVX>RBS))? ANVX/RBS : 1;
  localparam AMVB=((MFLG&`BENA) && (AMVX>RBS))? AMVX/RBS : 1;
  
  wire [ANVX-1:0] vidp,vodp,vidq;
  wire [AMVX-1:0] widp,wodp,widq;  
  wire [NX*NB:0] dumn;
  wire [MX*MB:0] dumm;

  wire [ANVB-1:0] vienab = (NFLG&`BENA)? vjena : H;
  wire [AMVB-1:0] wienab = (MFLG&`BENA)? wjena : H;

//  wire vienax = (NXO>0)? viena[0] && (vaddr[NXO-1:0]=={NXO{H}}) : (NFLG&`BENA)? (viena!=0) : viena[0];	// write on last element of row
//  wire wienax = (MXO>0)? wiena[0] && (waddr[MXO-1:0]=={MXO{H}}) : (MFLG&`BENA)? (wiena!=0) : wiena[0];

  wire vienax = (NFLG&`BENA)? (viena!=0) : viena[0];	// write on every element of row
  wire wienax = (MFLG&`BENA)? (wiena!=0) : wiena[0];

  genvar i;
  generate
  if (RBS==8) begin
   if (NXO>0) begin
    wire [NXO-1:0] vaddrx; 
    assign vidp = {vid[ANV-1:0],vidq[ANVX-1:ANV]};		// shift next element into full row - assumes linear address order
    ffe #(ANVX) vff (vidq,vidp,vclk,viena[0]);
    ffe #(NXO) vffa (vaddrx,vaddr[NXO-1:0],vclk,vena);
    muxMxN #(NX,ANV) vmx (vod,vodp,vaddrx);
   end else begin
    assign vidp = vid;
    assign vod = vodp;
   end
   if (MXO>0) begin
    wire [MXO-1:0] waddrx; 
    assign widp = {wid[AMV-1:0],widq[AMVX-1:AMV]};
    ffe #(AMVX) wff (widq,widp,wclk,wiena[0]);
    ffe #(MXO) wffa (waddrx,waddr[MXO-1:0],wclk,wena);
    muxMxN #(MX,AMV) wmx (wod,wodp,waddrx);
   end else begin
    assign widp = wid;
    assign wod = wodp;
   end
  end
  else begin	 
    for(i=0; i<NB; i=i+1) begin: ALT_APARITY
      assign vidp[(i+1)*10-1:i*10] = {1'b0,vip[i],vid[(i+1)*8-1:i*8]};
      assign {dumn[i],vop[i],vod[(i+1)*8-1:i*8]} = vodp[(i+1)*10-1:i*10];
    end
    for(i=0; i<MB; i=i+1) begin: ALT_BPARITY      
      assign widp[(i+1)*10-1:i*10] = {1'b0,wip[i],wid[(i+1)*8-1:i*8]};
      assign {dumm[i],wop[i],wod[(i+1)*8-1:i*8]} = wodp[(i+1)*10-1:i*10];
    end
  end
  endgenerate

  localparam CLKA="CLOCK0", CLKB=(CCM)?"CLOCK0":"CLOCK1", RDWM="DONT_CARE";

  altera_syncram #(
`ifdef ARRIA10
    .lpm_type("altsyncram"), .intended_device_family("Arria 10"),
    .operation_mode("BIDIR_DUAL_PORT"), .byte_size(RBS), .ram_block_type("M20K"),	// M9K,M144K,AUTO
`elsif AGILEX
    .lpm_type("altera_syncram"), .intended_device_family("Agilex 7"),
    .operation_mode(SDP? "DUAL_PORT" : "BIDIR_DUAL_PORT"), .byte_size(RBS), .ram_block_type("M20K"),		// M9K,M144K,AUTO
`else
    .lpm_type("altsyncram"), .intended_device_family("Stratix V"),
    .operation_mode("BIDIR_DUAL_PORT"), .byte_size(RBS), .ram_block_type("M20K"),	// M9K,M144K,AUTO
`endif
    .width_a(ANVX), .widthad_a(NA-NXO), .numwords_a(1<<(NA-NXO)), .width_byteena_a(ANVB), .outdata_reg_a(NREG?CLKA:"UNREGISTERED"),
    .width_b(AMVX), .widthad_b(MA-MXO), .numwords_b(1<<(MA-MXO)), .width_byteena_b(AMVB), .outdata_reg_b(MREG?CLKB:"UNREGISTERED"),
    .clock_enable_input_a("NORMAL"), .clock_enable_output_a(NREG?"NORMAL":"BYPASS"), .read_during_write_mode_port_a(RDWM),
    .clock_enable_input_b("NORMAL"), .clock_enable_output_b(MREG?"NORMAL":"BYPASS"), .read_during_write_mode_port_b(RDWM),
    .indata_reg_b(CLKB), .address_reg_b(CLKB), .rden_reg_b(CLKB), .byteena_reg_b(CLKB), .wrcontrol_wraddress_reg_b(CLKB),
    .outdata_aclr_a("NONE"), .outdata_aclr_b("NONE"), .outdata_sclr_a("NONE"), .outdata_sclr_b("NONE"), 
    .power_up_uninitialized("FALSE"), .init_file_layout("PORT_B"), .init_file(MFN)
  ) b9k (
    .aclr0(L), .rden_a(vena), .byteena_a(vienab), .wren_a(vienax), .address_a(vaddr[NA-1:NXO]), .data_a(vidp), .q_a(vodp),
    .aclr1(L), .rden_b(wena), .byteena_b(wienab), .wren_b(wienax), .address_b(waddr[MA-1:MXO]), .data_b(widp), .q_b(wodp),
    .clock0(vclk), .clock1(CCM?H:wclk), .clocken0(H), .clocken1(H),
`ifdef AGILEX
    .clocken2(H), .clocken3(H), .address2_a(H), .address2_b(H), .sclr(L), 
`endif
    .addressstall_a(L), .addressstall_b(L), .eccstatus()
  );
endmodule
  
`endif

`ifdef VIRTEX8

module RAMB32_S36_S36 (CLKA,ENA,WEA,SSRA,ADDRA,DIA,DIPA,DOA,DOPA,
                       CLKB,ENB,WEB,SSRB,ADDRB,DIB,DIPB,DOB,DOPB);
  parameter NA=10,ND=32,NP=4;
  parameter MA=10,MD=32,MP=4;

  parameter [255:0] INIT_00=0, INIT_01=0, INIT_02=0, INIT_03=0, INIT_04=0, INIT_05=0, INIT_06=0, INIT_07=0, INIT_08=0, INIT_3F=0;

  localparam NB=(ND/8), NW=NB*9;
  localparam MB=(MD/8), MW=MB*9;
  localparam LRS=15, LND=5, LMD=5;

  input CLKA,ENA,WEA,SSRA;
  input [NA-1:0] ADDRA;
  input [ND-1:0] DIA;
  input [NP-1:0] DIPA;
  output [ND-1:0] DOA;
  output [NP-1:0] DOPA;

  input CLKB,ENB,WEB,SSRB;
  input [MA-1:0] ADDRB;
  input [MD-1:0] DIB;
  input [MP-1:0] DIPB;
  output [MD-1:0] DOB;
  output [MP-1:0] DOPB;

  wire L=0,H=1;
  wire [LRS:0] vadrx = {H,ADDRA,{LND{H}}};
  wire [LRS:0] wadrx = {H,ADDRB,{LMD{H}}};

  RAMB36E1 #(.READ_WIDTH_A(NW),.WRITE_WIDTH_A(NW), .READ_WIDTH_B(MW),.WRITE_WIDTH_B(MW), .INIT_00(INIT_00),.INIT_01(INIT_01)) ram (
    .CLKARDCLK(CLKA),.ENARDEN(ENA),.WEA  ({NB{WEA}}), .ADDRARDADDR(vadrx), .DIADI(DIA),.DIPADIP(DIPA), .DOADO(DOA),.DOPADOP(DOPA),
    .CLKBWRCLK(CLKB),.ENBWREN(ENB),.WEBWE({MB{WEB}}), .ADDRBWRADDR(wadrx), .DIBDI(DIB),.DIPBDIP(DIPB), .DOBDO(DOB),.DOPBDOP(DOPB),
    .REGCEAREGCE(L),.REGCEB(L),.RSTRAMARSTRAM(L),.RSTRAMB(L),.RSTREGARSTREG(L),.RSTREGB(L));

endmodule

`endif


`ifndef verilator

// specialty for sin/cos lookup tables
module dpram_4096x16x16 (vclk, vce, vaddr, viena, vidat, vodat,
                         wclk, wce, waddr, wiena, widat, wodat);

  input vclk, vce, viena, wclk, wce, wiena;
  input [11:0] vaddr;
  input [15:0] vidat;
  output [15:0] vodat;
  input [11:0] waddr;
  input [15:0] widat;
  output [15:0] wodat;

  wire H=1, L=0;
  RAMB16_S4_S4 ram0 (.ADDRA(vaddr), .DIA(vidat[3+0:0]), .CLKA(vclk), .ENA(vce), .WEA(viena), .SSRA(L), .DOA(vodat[3+0:0]),
                     .ADDRB(waddr), .DIB(widat[3+0:0]), .CLKB(wclk), .ENB(wce), .WEB(wiena), .SSRB(L), .DOB(wodat[3+0:0]));
  RAMB16_S4_S4 ram1 (.ADDRA(vaddr), .DIA(vidat[3+4:4]), .CLKA(vclk), .ENA(vce), .WEA(viena), .SSRA(L), .DOA(vodat[3+4:4]),
                     .ADDRB(waddr), .DIB(widat[3+4:4]), .CLKB(wclk), .ENB(wce), .WEB(wiena), .SSRB(L), .DOB(wodat[3+4:4]));
  RAMB16_S4_S4 ram2 (.ADDRA(vaddr), .DIA(vidat[3+8:8]), .CLKA(vclk), .ENA(vce), .WEA(viena), .SSRA(L), .DOA(vodat[3+8:8]),
                     .ADDRB(waddr), .DIB(widat[3+8:8]), .CLKB(wclk), .ENB(wce), .WEB(wiena), .SSRB(L), .DOB(wodat[3+8:8]));
  RAMB16_S4_S4 ram3 (.ADDRA(vaddr), .DIA(vidat[3+12:12]), .CLKA(vclk), .ENA(vce), .WEA(viena), .SSRA(L), .DOA(vodat[3+12:12]),
                     .ADDRB(waddr), .DIB(widat[3+12:12]), .CLKB(wclk), .ENB(wce), .WEB(wiena), .SSRB(L), .DOB(wodat[3+12:12]));
 `include "../lib/sinlut.hx"

endmodule

// specialty for FFT twiddle tables
module dpram_1024x32x32 (vclk, vce, vaddr, viena, vidat, vodat,
                         wclk, wce, waddr, wiena, widat, wodat);

  input vclk, vce, viena, wclk, wce, wiena;
  input [9:0] vaddr;
  input [31:0] vidat;
  output [31:0] vodat;
  input [9:0] waddr;
  input [31:0] widat;
  output [31:0] wodat;

  wire H=1, L=0;
  genvar i;
  generate
  for (i=0; i<32; i=i+16) begin:ram
    RAMB16_S18_S18 inst (
    .ADDRA(vaddr), .DIA(vidat[i+15:i]), .CLKA(vclk), .ENA(vce), .WEA(viena), .SSRA(L), .DOA(vodat[i+15:i]),
    .ADDRB(waddr), .DIB(widat[i+15:i]), .CLKB(wclk), .ENB(wce), .WEB(wiena), .SSRB(L), .DOB(wodat[i+15:i]));
  end
  endgenerate

endmodule

// PCI bidirectional channelized fifo (dir=1 vi-to-wo) (dir=0 wi-to-vo)
module bififo128k64x64c4K (vclk,vrst,vchan, vival,vidat, voena,vodat,
                           wclk,wrst,wchan, wival,widat, woena,wodat);
  parameter BM=3;
  input vrst, vclk, vival, voena;
  input wrst, wclk, wival, woena;
  input [1:0] vchan, wchan;

  input [63:0] vidat,widat;
  output [63:0] vodat,wodat;

  reg [8:0] vcnt, wcnt;

  wire vval = vival | voena;
  always @ (posedge vclk or posedge vrst) begin
    if (vrst) vcnt <= 0;
    else if (vval) vcnt <= vcnt+1;
  end

  wire wval = woena | wival;
  always @ (posedge wclk or posedge wrst) begin
    if (wrst) wcnt <= 0;
    else if (wval) wcnt <= wcnt+1;
  end

  wire H=1, L=0;
  genvar i;
  generate
  for (i=0; i<64; i=i+8) begin:ram
   RAMB16_S9_S9 inst (
    .ADDRA({vchan,vcnt}), .CLKA(vclk), .ENA(H), .WEA(vival), .SSRA(L), .DIPA(1'h0), .DIA(vidat[i+7:i]), .DOA(vodat[i+7:i]),
    .ADDRB({wchan,wcnt}), .CLKB(wclk), .ENB(H), .WEB(wival), .SSRB(L), .DIPB(1'h0), .DIB(widat[i+7:i]), .DOB(wodat[i+7:i])
   );
  end
  endgenerate

endmodule


// PCI channelized fifo with prefetch
module bififo128k64x64pfc (vclk,vrst,vchan, virdy,vival,vidat, vordy,voena,vodat,
                           wclk,wrst,wchan, wirdy,wival,widat, wordy,woena,wodat,
                           vopre,usepre,wide,endian);

  input vrst, vclk, vival, voena, vopre, usepre, wide;
  input wrst, wclk, wival, woena;
  input [3:0] vchan, wchan;
  input [2:0] endian;
  output virdy, vordy, wirdy, wordy;

  input [63:0] vidat,widat;
  output [63:0] vodat,wodat;

  reg prime,primed,voenay,vopred,prep,vohigh,vosel,visel,vivalx;
  wire voenaz = (prime || voena);
  wire voenax = (prime || voena) && (wide || vohigh);
  wire [63:0] vodatx;
  reg [63:0] vidatx;
  reg [1:0] pcnt;

  // generate 3 prime cycles
  always @ (posedge vclk or posedge vrst) begin
    if (vrst) primed <= 0;
    else primed <= (pcnt==1) || primed;
    // need 4 prime cycles triggered by reset
    if (vrst) prime <= 0;
    else prime <= (prep||prime) && !primed;
    // count of data in prefetch buffer
    if (vrst) pcnt <= 0;
    else if (voenay && !voena) pcnt <= pcnt+1;
    else if (!voenay && voena) pcnt <= pcnt-1;
    if (vrst) vohigh <= 0;
    else if (voenaz) vohigh <= vohigh ^ (!wide);
    if (vrst) visel <= 0;
    else if (vival) visel <= visel ^ (!wide);
  end
  always @ (posedge vclk) begin
    voenay <= voenaz;
    vopred <= vopre;
    vosel  <= vohigh;
    prep   <= vrst && usepre;
    vivalx <= vival && (wide || visel);
    if (vival) vidatx <= wide? vidat : {vidat[31:0],vidatx[63:32]};
  end

//  wire [1:0] padj = vopre? 1 : vopred? 2 : 3;
  wire [1:0] padj = {!vopre,!vopred};
  wire [1:0] addr = pcnt-padj;

  assign virdy = 1;
  assign vordy = 1;
  assign wirdy = 1;
  reg wordy;
  always @(posedge wclk) begin
    wordy <= (vchan!=wchan);
  end

  bififo128k64x64c4K ff (vclk,vrst,vchan[1:0], vivalx,vidatx, voenax,vodatx,
                         wclk,wrst,wchan[1:0], wival,widat, woena,wodat);

  srl16xN #(64) sr (vclk, voenay, {2'b00,addr}, {vodatx[63:32],vosel?vodatx[63:32]:vodatx[31:0]}, vodat);

endmodule

`endif

module cram64xNs (q, d, a, clk,we);
  parameter N=1;
  input clk,we;
  input [N-1:0] d;
  input [5:0] a;
  output [N-1:0] q;

`ifdef VIVADO
  genvar i;
  generate
  for (i=0; i<N; i=i+1) begin:rb
    RAM64X1S cr (.WCLK(clk),.WE(we),.D(d[i]),.A0(a[0]),.A1(a[1]),.A2(a[2]),.A3(a[3]),.A4(a[4]),.A5(a[5]),.O(q[i]));
  end
  endgenerate
`else
  //Synthesis attribute ram_style of ram is distributed
  reg [N-1:0] ram [63:0] /* synthesis syn_ramstyle="select_ram" */;
  always @(posedge clk) if (we) ram[a] <= d;
  assign q = ram[a];   
`endif

endmodule

module cram16xNs (q, d, a, clk,we);
  parameter N=1;
  input clk,we;
  input [N-1:0] d;
  input [3:0] a;
  output [N-1:0] q;

`ifdef VIVADO
  genvar i;
  generate
  for (i=0; i<N; i=i+1) begin:rb
    RAM32X1S cr (.WCLK(clk),.WE(we),.D(d[i]),.A0(a[0]),.A1(a[1]),.A2(a[2]),.A3(a[3]),.A4(1'b0),.O(q[i]));
  end
  endgenerate
`else
  //Synthesis attribute ram_style of ram is distributed
  reg [N-1:0] ram [15:0] /* synthesis syn_ramstyle="select_ram" */;
  always @(posedge clk) if (we) ram[a] <= d;
  assign q = ram[a];   
`endif

endmodule

module cram16xNd (q, d, a, clk,we, ar,qr);
  parameter N=1;
  input clk,we;
  input [N-1:0]  d;
  input [3:0] a,ar;
  output [N-1:0] q,qr;

`ifdef VIVADO
  genvar i;
  generate
  for (i=0; i<N; i=i+1) begin:rb
    RAM32X1D cr (.WCLK(clk),.WE(we),.D(d[i]),.A0(a[0]),.A1(a[1]),.A2(a[2]),.A3(a[3]),.A4(1'b0),.SPO(q[i]),
            .DPRA0(ar[0]),.DPRA1(ar[1]),.DPRA2(ar[2]),.DPRA3(ar[3]),.DPRA4(1'b0),.DPO(qr[i]));
  end
  endgenerate
`else
  //Synthesis attribute ram_style of ram is distributed
  reg [N-1:0] ram [15:0] /* synthesis syn_ramstyle="select_ram" */;
  always @(posedge clk) if (we) ram[a] <= d;
  assign q = ram[a];   
  assign qr = ram[ar];   
`endif

endmodule

module cram16xNsd (d,a, clk,we, ar,qr);
  parameter N=1;
  input clk,we;
  input  [N-1:0] d;
  input [3:0] a,ar;
  output [N-1:0] qr;

  wire [N-1:0] q;
  cram16xNd #(N) cc (q,d,a, clk,we, ar,qr);

endmodule

module cram16xNsdp (d,a, clk,we, ar,qr);
  parameter N=1;
  input clk,we;
  input  [N-1:0] d;
  input [3:0] a,ar;
  output reg [N-1:0] qr;

  wire [N-1:0] q,pr;
  cram16xNd #(N) cc (q,d,a, clk,we, ar,pr);
  always @(posedge clk) qr <= pr;

endmodule

/* dual port with asynchronous read - (for sparse we only) */
module cram16xNda (q, d, a, clk,we, ar,qr,clkr);
  parameter N=1;
  input clk,we,clkr;
  input [N-1:0] d;
  input [3:0] a,ar;
  output [N-1:0] q,qr;

  wire [N-1:0] pr;
  cram16xNd #(N) cc (q,d,a, clk,we, ar,pr);

  reg [3:0] aw;
  reg [N-1:0] dw;
  reg cw,cr,re;

  always @(posedge clk) if (we) aw <= a;
  always @(posedge clk) if (we) dw <= d;
  always @(posedge clk) if (we) cw <= !cw;

  always @(posedge clkr) re <= (cw^cr) && !re;
  always @(posedge clkr) if (re) cr <= !cr;

  wire [N-1:0] qw;
  cram16xNd #(N) dd (qw,dw,aw, clkr,re, ar,qr);

endmodule

module rephaser (rst, iclk, d, oclk, q);
  parameter N=1;
  parameter DELAY=8;
  parameter PIPE=1;
  input rst,iclk,oclk;
  input  [N-1:0] d;
  output [N-1:0] q;

  wire L=0,H=1;
  reg [3:0] icnt,ocnt;
  wire [N-1:0] e;
  reg [N-1:0] f;
  wire irst = rst;
  reg orst;
  always @(posedge irst or posedge iclk) if (irst) icnt <= 0; else icnt <= icnt+1;
  always @(posedge irst or posedge iclk) if (irst) orst <= H; else orst <= orst && (icnt<DELAY);
  always @(posedge orst or posedge oclk) if (orst) ocnt <= 0; else ocnt <= ocnt+1;
  always @(posedge oclk) f <= e;
  assign q = (PIPE==1)? f : e;

`ifdef VIVADO
  genvar i;
  generate
  for (i=0; i<N; i=i+1) begin:rb
    RAM32X1D cr (.WCLK(iclk),.WE(H),.D(d[i]),.A0(icnt[0]),.A1(icnt[1]),.A2(icnt[2]),.A3(icnt[3]),.A4(1'b0),
                 .DPRA0(ocnt[0]),.DPRA1(ocnt[1]),.DPRA2(ocnt[2]),.DPRA3(ocnt[3]),.DPRA4(1'b0),.DPO(e[i]));
  end
  endgenerate
`else
  //Synthesis attribute ram_style of ram is distributed
  reg [N-1:0] ram [15:0] /* synthesis syn_ramstyle="select_ram" */;
  always @(posedge iclk) ram[icnt] <= d;
  assign e = ram[ocnt];   
`endif

endmodule


// FIFO for commands
module cmdfifo (rst,stats, vclk,vrdy,vena,vdat, wclk,wrdy,wena,wdat); 
  parameter signed INDEX=-1;

  input rst;
  output [31:0] stats;
  output reg vrdy,wrdy;

  input vclk,vena;
  input [35:0] vdat;

  input wclk,wena;
  output [35:0] wdat;

  wire H=1,L=0;
  wire venax = vena && ( (INDEX<0) || (vdat[35:32]==INDEX) );

  reg [8:0] vcnt,wcnt,vcntx;
  wire [8:0] diff = vcntx-wcnt;
  always @(posedge vclk) begin
    if (rst) vcnt <= 0; else if (venax) vcnt <= vcnt+1;
    vrdy <= (diff[8:5] != 4'hF);
  end
  always @(posedge wclk) begin
    if (rst) wcnt <= 0; else if (wena) wcnt <= wcnt+1;
    vcntx <= vcnt;
    wrdy <= (diff!=0) && !wena;
  end

  sdpram #(2,36,36) ram (vclk,H,vcnt,venax,vdat, wclk,H,wcnt,wdat);
  assign stats = {wcnt[7:0],vcnt[7:0],4'hA,3'b0,diff[8:0]};

endmodule

module xfifo36 (rst, vclk,vrdy,vena,vdat, wclk,wrdy,wena,wdat);
  parameter AFULL=480;
  parameter AEMPTY=16;

  localparam AFO=504;  // workaround for async flag problems - bad if this ever crosses
  localparam AEO=500;  // workaround for async flag problems - bad if this ever crosses

  input rst,vclk,vena,wclk,wena;
  input [35:0] vdat;
  output [35:0] wdat;
  output vrdy,wrdy;

  wire H=1,L=0;

  wire empty;
  wire [8:0] vcnt,wcnt;
  FIFO16 #(.DATA_WIDTH(36), .FIRST_WORD_FALL_THROUGH("TRUE"), .ALMOST_FULL_OFFSET(AFO), .ALMOST_EMPTY_OFFSET(AEO) ) 
  fifo (.RST(rst), .EMPTY(empty), .RDCOUNT(wcnt), .WRCOUNT(vcnt), .FULL(),
	.WRCLK(vclk), .WREN(vena), .DIP(vdat[35:32]), .DI(vdat[31:0]), 
	.RDCLK(wclk), .RDEN(wena), .DOP(wdat[35:32]), .DO(wdat[31:0]) );

  reg [8:0] diff;
  reg afull1,afull2,afull;
  always @(posedge vclk) begin
    diff   <= (vcnt-wcnt);
    afull1 <= (diff>AFULL);
    afull2 <= afull1;
    afull  <= afull1 && afull2;
  end

  assign vrdy = !afull;
  assign wrdy = !empty;

endmodule

// data formats 0=SI 1=SB 2=SN 3=SP 4=SL 5=SX   8=CI 9=CB C=CL
        
// packed data FIFO with output reformat to wofmt=CI 
module fifoNxMfmt (vclk,vrst, vifmt,virdy,vival,vidat, 
                   wclk,wrst, wofmt,wordy,woena,wodat);
  parameter N=64;	// input width
  parameter M=32;	// output width per channel (always 32)
  parameter DP=0;	// num DP rams
  parameter NC=1;	// number output channels
  parameter NFLG=0;	// N flags
  parameter MFLG=0;	// M flags
  input vrst, vclk, vival;
  input wrst, wclk, woena;
  output virdy, wordy;
  input [N-1:0] vidat;
  output [NC*M-1:0] wodat;
  input [3:0] vifmt;
  input [4:0] wofmt;

  wire H=1, L=0;
  wire xordy,xoena;
  wire [NC*M-1:0] xodat;

  fifoNxM #(N,NC*M,DP,3,NFLG,MFLG|`CENA) ff (vclk,vrst, virdy,vival,vidat, wclk,wrst, xordy,xoena,xodat);

  reg [2:0] cnt;
  reg [3:0] clr;
  reg prime,next1,next2;
  assign wordy = xordy && !prime;			// output ready
  wire ena = (prime && xordy) || woena;			// count/selector enable
  wire dbl = wofmt[4];	// double wide with NC=2
  wire next;
  assign xoena = (prime && xordy) || (woena && next);	// fifo read signal

  wire sb = (vifmt==1);
  wire si = (vifmt==0);
  wire cb = (vifmt==9);
  wire ci = (vifmt==8);
  
  always @(posedge wclk) begin
    // count is one cycle early (uses prime) to latch select in output cycle
    if (wrst) cnt <= 0; else if (ena) cnt <= cnt+1;
    // prime will be up for one cycle with xordy to pipeline the fifo output
    if (wrst) prime <= 1; else prime <= prime && !xordy;
    // which bytes to clear
    clr <= (vifmt==1)? 4'b1101 : (vifmt==0)? 4'b1100 : (vifmt==9)? 4'b0101 : 4'b0000;
    // next is when we need to read the next fifo entry 1=SB 0=SI 9=CB x=CI
    if (ena) next1 <= sb? (cnt[1:0]==3) : si? cnt[0] :      cb? cnt[0]      : H;
    if (ena) next2 <= sb? (cnt[2:0]==7) : si? cnt[1:0]==3 : cb? cnt[1:0]==3 : cnt[0];
  end

  wire c0 = cnt[0];
  wire c1 = cnt[1];
  wire c2 = cnt[2];
  reg [NC:0] sel [4*NC-1:0];	// the selects pick the fifo byte to present at the output

  generate
  if (NC==2) begin:NC2
    assign next = dbl? next1 : next2;
    always @(posedge wclk) begin
      if (ena) sel[7] <= dbl? (                               cb? {c0,H,H} : 7) : (                          cb? {c1,c0,H} : {c0,H,H});
      if (ena) sel[6] <= dbl? (                                              6) : (                                          {c0,H,L});
      if (ena) sel[5] <= dbl? (sb? {c1,c0,H} : si? {c0,H,H} : cb? {c0,H,L} : 5) : (sb? cnt : si? {c1,c0,H} : cb? {c1,c0,L} : {c0,L,H});
      if (ena) sel[4] <= dbl? (                si? {c0,H,L} :                4) : (          si? {c1,c0,L} :                 {c0,L,L});
      if (ena) sel[3] <= dbl? (                               cb? {c0,L,H} : 3) : (                          cb? {c1,c0,H} : {c0,H,H});
      if (ena) sel[2] <= dbl? (                                              2) : (                                          {c0,H,L});
      if (ena) sel[1] <= dbl? (sb? {c1,c0,L} : si? {c0,L,H} : cb? {c0,L,L} : 1) : (sb? cnt : si? {c1,c0,H} : cb? {c1,c0,L} : {c0,L,H});
      if (ena) sel[0] <= dbl? (                si? {c0,L,L} :                0) : (          si? {c1,c0,L} :                 {c0,L,L});
    end
    muxMxNpec #(8,8) m7 (wodat[63:56], xodat, sel[7],wclk,woena,clr[3]);
    muxMxNpec #(8,8) m6 (wodat[55:48], xodat, sel[6],wclk,woena,clr[2]);
    muxMxNpec #(8,8) m5 (wodat[47:40], xodat, sel[5],wclk,woena,clr[1]);
    muxMxNpec #(8,8) m4 (wodat[39:32], xodat, sel[4],wclk,woena,clr[0]);
    muxMxNpec #(8,8) m3 (wodat[31:24], xodat, sel[3],wclk,woena,clr[3]);
    muxMxNpec #(8,8) m2 (wodat[23:16], xodat, sel[2],wclk,woena,clr[2]);
    muxMxNpec #(8,8) m1 (wodat[15:08], xodat, sel[1],wclk,woena,clr[1]);
    muxMxNpec #(8,8) m0 (wodat[07:00], xodat, sel[0],wclk,woena,clr[0]);
  end
  if (NC==1) begin:NC1
    assign next = next1;
    always @(posedge wclk) begin	
      if (ena) sel[3] <=                        cb? {c0,H} : 3;
      if (ena) sel[2] <=                                     2;
      if (ena) sel[1] <= sb? cnt : si? {c0,H} : cb? {c0,L} : 1;
      if (ena) sel[0] <=           si? {c0,L}              : 0;
    end
    muxMxNpec #(4,8) m3 (wodat[31:24], xodat, sel[3],wclk,woena,clr[3]);
    muxMxNpec #(4,8) m2 (wodat[23:16], xodat, sel[2],wclk,woena,clr[2]);
    muxMxNpec #(4,8) m1 (wodat[15:08], xodat, sel[1],wclk,woena,clr[1]);
    muxMxNpec #(4,8) m0 (wodat[07:00], xodat, sel[0],wclk,woena,clr[0]);
  end
  endgenerate

endmodule

// packed data FIFO with output reformat from wifmt=CI|CL
module fifoNxMfmti (vclk,vrst, vofmt,vordy,voena,vodat, 
                    wclk,wrst, wifmt,wirdy,wival,widat);
  parameter N=64;
  parameter M=32;
  parameter DP=2;
`ifdef verilator
  parameter BM=6;
`else
  parameter BM=3;
`endif
`ifdef MCS_FMTI
  parameter NFLG=`DBLBUF;
`else
  parameter NFLG=0;
`endif
  parameter MFLG=0;	// M flags
  input vrst, vclk, voena;
  input wrst, wclk, wival;
  output vordy, wirdy;
  output [N-1:0] vodat;
  input [M-1:0] widat;
  input [3:0] vofmt, wifmt;

  wire H=1, L=0;
  wire [M-1:0] xidat;
  reg xival;

  fifoNxM #(M,N,DP,BM,MFLG,NFLG) ff (wclk,wrst, wirdy,xival,xidat, vclk,vrst, vordy,voena,vodat);

  reg [1:0] cnt;
  reg [7:0] sel;

 generate

 if (M==32) begin:CI
  always @(posedge wclk) begin // here to stop verilator overoptimization
    xival <= wival && ((vofmt==1)? (cnt==3) : (vofmt==0)? cnt[0] : (vofmt==9)? cnt[0] : H);
    if (wrst) cnt <= 0; else if (wival) cnt <= cnt+1;
  end
  always @(posedge wclk) begin
    sel[7:6] <= (vofmt==1)? 1 : (vofmt==0)? 1 : (vofmt==9)? 3 : (vofmt==4)? 1 : 3;
    sel[5:4] <= (vofmt==1)? 1 : (vofmt==0)? 0 : (vofmt==9)? 1 : (vofmt==4)? 0 : 2;
    sel[3:2] <= (vofmt==1)? 1 : (vofmt==0)? 1 : (vofmt==9)? 3 : (vofmt==4)? 3 : 1;
    sel[1:0] <= (vofmt==1)? 1 : (vofmt==0)? 0 : (vofmt==9)? 1 : (vofmt==4)? 2 : 0;
  end
  wire ena3 = wival && ((vofmt==1)? (cnt==3) : (vofmt==0)?  cnt[0] : (vofmt==9)?  cnt[0] : H);
  wire ena2 = wival && ((vofmt==1)? (cnt==2) : (vofmt==0)?  cnt[0] : (vofmt==9)?  cnt[0] : H);
  wire ena1 = wival && ((vofmt==1)? (cnt==1) : (vofmt==0)? !cnt[0] : (vofmt==9)? !cnt[0] : H);
  wire ena0 = wival && ((vofmt==1)? (cnt==0) : (vofmt==0)? !cnt[0] : (vofmt==9)? !cnt[0] : H);

  mux4xNpe #(8) m3 (xidat[31:24], widat[7:0],widat[15:8],widat[23:16],widat[31:24], sel[7:6],wclk,ena3);
  mux4xNpe #(8) m2 (xidat[23:16], widat[7:0],widat[15:8],widat[23:16],widat[31:24], sel[5:4],wclk,ena2);
  mux4xNpe #(8) m1 (xidat[15:08], widat[7:0],widat[15:8],widat[23:16],widat[31:24], sel[3:2],wclk,ena1);
  mux4xNpe #(8) m0 (xidat[07:00], widat[7:0],widat[15:8],widat[23:16],widat[31:24], sel[1:0],wclk,ena0);
 end

 if (M==64) begin:CL
  always @(posedge wclk) begin // here to stop verilator overoptimization
    xival <= wival && ((vofmt==0)? (cnt==3) : (vofmt==4)? cnt[0] : (vofmt==8)? cnt[0] : H);
    if (wrst) cnt <= 0; else if (wival) cnt <= cnt+1;
  end
  always @(posedge wclk) begin
    sel[7:6] <= (vofmt==0)? 1 : (vofmt==4)? 1 : (vofmt==8)? 3 : (vofmt==4)? 1 : 3;
    sel[5:4] <= (vofmt==0)? 1 : (vofmt==4)? 0 : (vofmt==8)? 1 : (vofmt==4)? 0 : 2;
    sel[3:2] <= (vofmt==0)? 1 : (vofmt==4)? 1 : (vofmt==8)? 3 : (vofmt==4)? 3 : 1;
    sel[1:0] <= (vofmt==0)? 1 : (vofmt==4)? 0 : (vofmt==8)? 1 : (vofmt==4)? 2 : 0;
  end
  wire ena3 = wival && ((vofmt==0)? (cnt==3) : (vofmt==4)?  cnt[0] : (vofmt==8)?  cnt[0] : H);
  wire ena2 = wival && ((vofmt==0)? (cnt==2) : (vofmt==4)?  cnt[0] : (vofmt==8)?  cnt[0] : H);
  wire ena1 = wival && ((vofmt==0)? (cnt==1) : (vofmt==4)? !cnt[0] : (vofmt==8)? !cnt[0] : H);
  wire ena0 = wival && ((vofmt==0)? (cnt==0) : (vofmt==4)? !cnt[0] : (vofmt==8)? !cnt[0] : H);

  mux4xNpe #(16) m3 (xidat[63:48], widat[15:0],widat[31:16],widat[47:32],widat[63:48], sel[7:6],wclk,ena3);
  mux4xNpe #(16) m2 (xidat[47:32], widat[15:0],widat[31:16],widat[47:32],widat[63:48], sel[5:4],wclk,ena2);
  mux4xNpe #(16) m1 (xidat[31:16], widat[15:0],widat[31:16],widat[47:32],widat[63:48], sel[3:2],wclk,ena1);
  mux4xNpe #(16) m0 (xidat[15:00], widat[15:0],widat[31:16],widat[47:32],widat[63:48], sel[1:0],wclk,ena0);
 end

  endgenerate

endmodule


//  pack bits into a packed data FIFO 
module bitpacker (clk,rst,bits, iena,idat, oena,odat);
  parameter M=8;	// in/out bus width 
  localparam LM=(M<=8)? 3 : (M<=16)? 4 : (M<=32)? 5 : 6;
  input clk,rst;
  input[5:0] bits;
  input iena;
  input [M-1:0] idat;
  output oena;
  output [M-1:0] odat;

  wire H=1, L=0;

  reg [LM-1:0] boff;
  wire [LM:0] noff = bits+boff;
  wire over = noff[LM];
  wire oena = iena && over;
  reg [M-1:0] sdat;

  always @(posedge clk) if (rst) boff <= 0; else if (iena) boff <= noff;

genvar i;
generate
  for (i=0; i<M; i=i+1) begin:biti
    assign odat[i] = (i>=boff)? idat[i-boff] : sdat[i];
    wire atplay = over || (i==M-1) || (i>=boff);
    always @(posedge clk) if (iena && atplay) sdat[i] <= idat[i-boff];
  end
endgenerate

endmodule

//  unpack bits from a packed data FIFO 
module bitunpacker (clk,rst,bits, irdy,iena,idat, oena,odat);
  parameter M=8;	// in/out bus width 
  localparam LM=(M<=8)? 3 : (M<=16)? 4 : (M<=32)? 5 : 6;
  input clk,rst,irdy;
  input[5:0] bits;
  output iena;
  input [M-1:0] idat;
  input oena;
  output [M-1:0] odat;

  wire H=1, L=0;

  reg [LM-1:0] boff,boffd;
  wire [LM:0] noff = bits+boff;
  wire over = noff[LM];
  reg overd,prime,primed;
  assign iena = (oena && over) || prime;
  reg [M-1:0] sdat;
  reg [5:0] bitsd;
  wire [2*M-1:0] xsdat = {idat,sdat};

  always @(posedge clk) begin
    if (rst) boff <= 0; else if (oena) boff <= noff;
    boffd <= boff;
    bitsd <= bits;
    overd <= iena;
    if (overd) sdat <= idat;
    if (rst) primed <= 0; else primed <= prime || primed;
    if (rst) prime  <= 0; else prime <= !primed && !prime && irdy;
  end

genvar i;
generate
  for (i=0; i<M; i=i+1) begin:biti
    assign odat[i] = (i>=bitsd)? L : xsdat[i+boffd];
  end
endgenerate

endmodule



module fifoNxMbi (vclk,vrst, virdy,vival,vidat, wclk,wrst, bits,wordy,woena,wodat);
  parameter N=64;	// input width
  parameter M=32;	// output max bits width 
  parameter DP=2;	// num DP rams
  parameter BM=3;
  parameter XFLG=0;

  input vrst, vclk, vival;
  input wrst, wclk, woena;
  output [1:0] virdy, wordy;
  input [N-1:0] vidat;
  output [M-1:0] wodat;
  input [5:0] bits;

  wire xoena;
  wire [M-1:0] xodat;
  fifoNxM #(N,M,DP,BM,XFLG,0) ff (vclk,vrst, virdy,vival,vidat, wclk,wrst, wordy,xoena,xodat);
  bitunpacker #(M) bup (wclk,wrst,bits, wordy,xoena,xodat, woena,wodat); 

endmodule

module fifoNxMbo (vclk,vrst, vordy,voena,vodat, wclk,wrst, bits,wirdy,wival,widat);
  parameter N=64;
  parameter M=32;
  parameter DP=2;
  parameter BM=3;
  parameter XFLG=0;

  input vrst, vclk, voena;
  input wrst, wclk, wival;
  output [1:0] vordy, wirdy;
  output [N-1:0] vodat;
  input [M-1:0] widat;
  input [5:0] bits;

  wire xival;
  wire [M-1:0] xidat;
  bitpacker #(M) bp (wclk,wrst,bits, wival,widat, xival,xidat); 
  fifoNxM #(M,N,DP,BM,0,XFLG) ff (wclk,wrst, wirdy,xival,xidat, vclk,vrst, vordy,voena,vodat);

endmodule

/*
  Trace Debugger RAM block controller

  dbgbus
  dbgbusd dbgbusi
  dbgbuse active
  dbgbusf dbgwr

  mode 
  [1:0] 0=reset 1=oneshot 2=continuous 3=fullone
  [2] trig=gate
  [3] trig=invert 
  [7:4] trig bit 0-15

  bus busd buse busf ena diff dad dcnt ccnt 
  A
  A   A
  B   A    A
  B   B    A    A    t            0    1
  B   B    B    A    X   1        0    1
  C   B    B    B    X            1    1
  D   C    B    B    X            1    2
  D   D    C    B    X   1        1    3
  D   D    D    C    X   1        2    1
  D   D    D    D    X            3    1
      D    D    D    X
*/
module traceaddr (clk,cs,reset, wr,dati, dbgclk,dbgbus, dbgwr,dbgcnt,dbgbuso);
  parameter DBGBW=32;			// Trace width in bits
  parameter DBGBD=10;			// Trace depth in 2**ORD samples (def=1K)
  parameter SBW=32;
  input clk,cs,wr;
  input [7:0] dati;
  output reg reset;
  input dbgclk;
  input [DBGBW-1:0] dbgbus;
  output dbgwr;
  output reg [DBGBD:0] dbgcnt;
  output [DBGBW-1:0] dbgbuso;

  reg dbgsys,dbgsysd;
  reg [7:0] dbgctl;
  always @(posedge clk) begin
    dbgsys  <= cs && wr;
    dbgsysd <= dbgsys;
    if (dbgsys) dbgctl <= dati[7:0];
    if (dbgsysd&&(dbgctl[1:0]!=0)) $write("TraceDbg configured with mode=%x gate=%x inv=%x bit=%d\n", dbgctl[1:0],dbgctl[2],dbgctl[3],dbgctl[7:4]);
    reset <= (dbgctl[1:0]==0);	// dbg reset
  end

  wire oneshot = (dbgctl[1:0]==1);	// dbg oneshot
  wire sample = (dbgctl[1:0]==2);	// dbg sample
  wire onefull = (dbgctl[1:0]==3);	// dbg fullone
  wire gatem = dbgctl[2];		// dbg gate
  wire tinv = dbgctl[3];		// invert trigger

  reg rst,active,activated,gated,dbgen,dbgdiff,dbgcntx,addcnt;
  reg [3:0] dbgsel;
  reg [DBGBW-1:0] dbgbusd,dbgbuse,dbgbusf;
  reg [16:0] cmpcnt;
  wire dbgbusi = dbgbusd[dbgsel];
  wire done = ((oneshot|onefull) && dbgcnt[DBGBD]);
  assign dbgwr = dbgen && (dbgdiff || dbgcntx) && !done;

  wire [DBGBW-1:0] dbgbuso = onefull? dbgbusf : addcnt? cmpcnt : {cmpcnt[3:0],dbgbusf[27:0]};

  always @(posedge dbgclk) begin
    rst     <= reset;
    dbgsel  <= dbgctl[7:4];
    dbgbusd <= dbgbus;
    dbgbuse <= dbgbusd;
    dbgbusf <= dbgbuse;
    dbgdiff <= (dbgbusd != dbgbuse);
    dbgcntx <= (cmpcnt==14) || onefull;
    addcnt  <= !dbgdiff && cmpcnt[16];
    if (rst) activated <= 0; else activated <= active || activated;
    if (active && !activated) $write("TraceDbg triggered with trace=%x\n",dbgbuse);
    if (rst) dbgen  <= 0; else dbgen  <= ((dbgbusi^tinv) || active) && !gated;
    if (rst) active <= 0; else active <= ((dbgbusi^tinv) && !activated) || (active && !done);
    if (rst) dbgcnt <= 0; else if (dbgwr) dbgcnt <= dbgcnt+1;
    if (rst|dbgwr) cmpcnt <= 1; else if (dbgen) cmpcnt <= cmpcnt+1;
    gated <= gatem && !(dbgbusi^tinv);
  end

endmodule

module tracedbg (clk,cs, wr,dati, rd,dato, dbgclk,dbgbus);
  parameter DBGBW=32;			// Trace width in bits
  parameter DBGBD=10;			// Trace depth in 2**ORD samples (def=1K)
  parameter SBW=32;
  localparam NKR=(DBGBW/8)<<(DBGBD-10);	// Size of debug ram
  localparam AW=(SBW==8)? DBGBD+2 : DBGBD;
  input clk,cs,wr,rd;
  input [SBW-1:0] dati;
  output [SBW-1:0] dato;
  input dbgclk;
  input [DBGBW-1:0] dbgbus;

  wire reset,dbgwr;
  wire [SBW-1:0] data;
  reg [AW:0] addr;
  wire [DBGBD:0] dbgcnt;
  wire [DBGBW-1:0] dbgbuso;
  wire rdd; delaypipe #(4) dpr (clk,cs&rd,rdd);
  always @(posedge clk) if (cs&wr) addr <= 0; else if (rdd) addr <= addr+1;
  traceaddr #(DBGBW,DBGBD,SBW) tra (clk,cs,reset, wr,dati, dbgclk,dbgbus, dbgwr,dbgcnt,dbgbuso);
  sdpram #(NKR,DBGBW,SBW) dbgr (dbgclk,1'b1, dbgcnt,dbgwr, dbgbuso,  clk,cs, addr,data);
  assign dato = reset? data : (SBW==8)? dbgcnt>>{addr[1:0],3'b0} : dbgcnt;
endmodule

module portstatusblk (clk1,cs1,addr1,wr1,dati1,rd1,dato1, clk2,cs2,addr2,wr2,dati2,rd2,dato2);
  parameter PORT=0, P1=0, P2=0, P3=0, P4=0, P5=0, P6=0, P7=0, P8=0;
  localparam B0=(P1>>0)&255, B1=(P1>>8)&255, B2=(P1>>16)&255, B3=(P1>>24)&255;	// need to flip bytes around for strange char handling
  localparam P1S=(B2==0)? (B0<<8)|(B1<<0) : (B3==0)? (B0<<16)|(B1<<8)|(B2<<0) : (B0<<24)|(B1<<16)|(B2<<8)|(B3<<0);
  `include "../lib/functions.h"
  localparam CLKF=clkf(PORT);
  localparam P8F=(P8>0)? P8 : CLKF;

  input clk1,cs1,wr1,rd1,clk2,cs2,wr2,rd2;
  input [31:0] addr1,dati1,addr2,dati2;
  output [31:0] dato1,dato2;

  function [255:0] collect; // needed to make a constant of the input parameters
    input [31:0] param0,param1,param2,param3,param4,param5,param6,param7;
    // param0 is byte swapped since it comes in as a verilog string this way
    collect = {param7,param6,param5,param4,param3,param2,param1,param0};
  endfunction

  RAMB16_S36_S36 ram ( .ADDRA(addr1[10:2]), .DIA(dati1),.DIPA(4'b0), .CLKA(clk1), .ENA(cs1), .WEA(wr1), .SSRA(1'b0), .DOA(dato1), .DOPA(),
                       .ADDRB(addr2[10:2]), .DIB(dati2),.DIPB(4'b0), .CLKB(clk2), .ENB(cs2), .WEB(wr2), .SSRB(1'b0), .DOB(dato2), .DOPB());
  defparam ram.INIT_3F = collect(P1S,P2,P3,P4,P5,P6,P7,P8F);

endmodule

module portstat (clk,cs,wr, addr,dati,dato);
  parameter PORT=0, P1=0, P2=0, P3=0, P4=0, P5=0, P6=0, P7=0, P8=0;
  input clk,cs,wr;
  input [31:0] addr,dati;
  output [31:0] dato;
  wire   [31:0] dummy;
  wire L=0,H=1;
  portstatusblk #(PORT,P1,P2,P3,P4,P5,P6,P7,P8) sbdp (clk,cs,addr,wr,dati,L,dato, clk,L,32'd0,L,32'd0,L,dummy);
endmodule

module dpstatusblk (clk1,cs1,addr1,wr1,dati1,rd1,dato1, clk2,cs2,addr2,wr2,dati2,rd2,dato2);
  parameter PARAM0=0, PARAM1=0, PARAM2=0, PARAM3=0, PARAM4=0, PARAM5=0, PARAM6=0, PARAM7=`SYS_CLKF_;
  localparam P0B0=(PARAM0>>0)&255;
  localparam P0B1=(PARAM0>>8)&255;
  localparam P0B2=(PARAM0>>16)&255;
  localparam P0B3=(PARAM0>>24)&255;	// need to flip bytes around for strange char handling
  localparam P0=(P0B2==0)? (P0B0<<8)|(P0B1<<0) : (P0B3==0)? (P0B0<<16)|(P0B1<<8)|(P0B2<<0) : (P0B0<<24)|(P0B1<<16)|(P0B2<<8)|(P0B3<<0);

  input clk1,cs1,wr1,rd1,clk2,cs2,wr2,rd2;
  input [31:0] addr1,dati1,addr2,dati2;
  output [31:0] dato1,dato2;

  function [255:0] collect; // needed to make a constant of the input parameters
    input [31:0] param0,param1,param2,param3,param4,param5,param6,param7;
    // param0 is byte swapped since it comes in as a verilog string this way
    collect = {param7,param6,param5,param4,param3,param2,param1,param0};
  endfunction

//  wire wrx = wr1 && (addr1[10:5]!=6'h3F);

  RAMB16_S36_S36 ram ( .ADDRA(addr1[10:2]), .DIA(dati1),.DIPA(4'b0), .CLKA(clk1), .ENA(cs1), .WEA(wr1), .SSRA(1'b0), .DOA(dato1), .DOPA(),
                       .ADDRB(addr2[10:2]), .DIB(dati2),.DIPB(4'b0), .CLKB(clk2), .ENB(cs2), .WEB(wr2), .SSRB(1'b0), .DOB(dato2), .DOPB());
  defparam ram.INIT_3F = collect(P0,PARAM1,PARAM2,PARAM3,PARAM4,PARAM5,PARAM6,PARAM7);

endmodule

module statusblk (clk, cs,addr, wr,dati, rd,dato);
  parameter PARAM0=0, PARAM1=0, PARAM2=0, PARAM3=0, PARAM4=0, PARAM5=0, PARAM6=0, PARAM7=`SYS_CLKF_;
  input clk,cs,wr,rd;
  input [31:0] addr,dati;
  output [31:0] dato;
  wire   [31:0] dummy;
  wire L=0,H=1;
  dpstatusblk #(PARAM0, PARAM1, PARAM2, PARAM3, PARAM4, PARAM5, PARAM6, PARAM7) 
        sbdp (clk,cs,addr,wr,dati,rd,dato, clk,L,32'd0,L,32'd0,L,dummy);
endmodule


module statusblkdbg (clk, cs,addr, wr,dati, rd,dato, dbgclk,dbgbus);
  parameter PARAM0=0, PARAM1=0, PARAM2=0, PARAM3=0, PARAM4=0, PARAM5=0, PARAM6=0, PARAM7=`SYS_CLKF_, DBGBW=32,DBGBD=10;
  input clk,cs,wr,rd;
  input [31:0] addr,dati;
  output [31:0] dato;
  input dbgclk;
  input [31:0] dbgbus;

  wire L=0,H=1;
  wire  [31:0] dummy,datos,datod;

  dpstatusblk #(PARAM0, PARAM1, PARAM2, PARAM3, PARAM4, PARAM5, PARAM6, PARAM7) 
        sbdp (clk,cs,addr,wr,dati,L,datos, clk,L,32'd0,L,32'd0,L,dummy);

  reg csd; always @(posedge clk) csd <= cs && (addr[11:0]==12'hFFC);

  tracedbg #(DBGBW,DBGBD) dbg (clk,csd, wr,dati, rd,datod, dbgclk,dbgbus);

  assign dato = csd? datod : datos;

endmodule

module swrstat (clk,cs,wr, addr,dati,dato);
  parameter PARAM0=0, PARAM1=0, PARAM2=0, PARAM3=0, PARAM4=0, PARAM5=0, PARAM6=0, PARAM7=`SYS_CLKF_;
  input clk,cs,wr;
  input [31:0] addr,dati;
  output [31:0] dato;
  wire   [31:0] dummy;
  wire L=0,H=1;
  dpstatusblk #(PARAM0, PARAM1, PARAM2, PARAM3, PARAM4, PARAM5, PARAM6, PARAM7) 
        sbdp (clk,cs,addr,wr,dati,L,dato, clk,L,32'd0,L,32'd0,L,dummy);

endmodule

module scrambler32dp (clk1, rst1, out1,  clk2, rst2, out2);
  parameter FB=0;	// num fixed bits at top
  input clk1,clk2,rst1,rst2;
  output [31:0] out1,out2;
  wire [31:0] out1_,out2_;
  wire H=1, L=0;
  reg [8:0] cnt1; always @(posedge clk1) if (rst1) cnt1 <= 0; else cnt1 <= cnt1+1;
  reg [8:0] cnt2; always @(posedge clk2) if (rst2) cnt2 <= 0; else cnt2 <= cnt2+1;
  RAMB16_S36_S36 inst (
   .CLKA(clk1), .ADDRA(cnt1), .DIA(L), .DIPA(L), .ENA(H), .WEA(L), .SSRA(L), .DOA(out1_), .DOPA(),
   .CLKB(clk2), .ADDRB(cnt2), .DIB(L), .DIPB(L), .ENB(H), .WEB(L), .SSRB(L), .DOB(out2_), .DOPB());
 `include "../lib/scramble.hx"
  assign out1 = {8'b10101010,out1_[31-FB:0]};
  assign out2 = {8'b10101010,out2_[31-FB:0]};

endmodule

