/**********************************************
 ICE Proprietary Software - do NOT disseminate
 **********************************************/

/*
  DMA Controller / IO State Machine 

  Jeff Schoen
  Innovative Computer Engineering, Inc.
  1/15/2003

  Input busses provide data to a core
  Output busses recieve data from a core
  
  Memory nomenclature exception: input is from mem, output is to mem

  Direct Busses:    PM  MB                             PH   			- PH on PM only
                    HX  TX      HX  TX               TX  CX          HX
  Input Muxes:        \/____<_____\/___<____  ____>____\/_____>_____/
                      /\          /\        \/         /\          /\
  Module Cores: NIO|PR  HP      CA  CB     DRAM      HA  HB      TA  TB  
                      \/          \/       ----        \/          \/
  Output Muxes:     PH \        CX \___>___//\\___<____/ HX        / TX
                        \___>______________/  \_______________<___/


    Signals-32b:		    Signals-64b:										Valid:
	13w 	lock3			 5w	lock3   c2-		raddr	
	14w 	lock4	cp		 6w	lock4   cp-		iaddr				ri_act,ri_sel
	15w	ostart	c0		 7w	ostart	c0-		waddr				ro_act,ro_sel
	 0r	ostartd	c1	addr	 0r	ostartd	c1-	addr		odone	setomux  oena				
	 1r	istart	c2	ador	 1r	istart	c2-	ador	raddr   oclear	setimux  oenax  osel_x iseln_		obusa&b
	 2r	istartd		ago	 2r	istartd	cp	ago	iaddr                    iena   			obusx
	 3r				 3r	lock1	c0		waddr                    ienax  isel_x iseln		jena,ibus,isel
	 4r				 4w	lock2	c1	addr	        idone
	 5r				 5w	lock3	c2	adow	raddr   iclear
	 6r		cp		 6w	lock4	cp	ago	iaddr
	 7r		c0		 7w	ostart	c0		waddr
	 8w		c1	addr     0r             c1                      odone
	 9w		c2	adow                    c2                      oclear
	10w			ago
	11w	lock1
	12w	lock2
	13w	lock3
	14w	lock4	cp
	15w	ostart	c0

					NIO bus
        ostart
        oena 		
        obus 	istart	omuxsel
        obusx	iena	imuxsel		istartd
	obusm	ibusx			iseln_
					ienan,iseln,ibusn

  Memory Map   MB/PM

    PPC_DMA       0xFD001800   this 2K ram is shadowed in the base 4K for speed 

    PPC_DMA_TO    0xFD001E00   MC x 64  to RAM     MultiChannel/Tuners 1-64
    PPC_DMA_XI    0xFD001F00   SC x 8   fr RAM     Proc,PCI/None,Cor1,Cor2,Hyp1,Hyp2,Tun1,Tun2
    PPC_DMA_XO    0xFD001F40   SC x 6   to RAM     Proc,PCI/None,Cor1,Cor2,Hyp1,Hyp2
    PPC_DMA_TX    0xFD001F70   MC x 4   to RAM     Tun1A/B,Tun2A/B

  Notes:

  The HX outputs are the only direct routes that support concurrent HX2R 

  The SELECT bits are interpreted as:
   7	- 1=multichannel 0=direct
   6	- 1=auxiliary 0=RAM
   5:0	- channel number

*/

`include "vdefs.h"

`ifdef ICE_PM
  `define DMA_MODE 1
`elsif NOPM
  `define DMA_MODE 2
`else
  `define DMA_TX2TX 1
  `define DMA_MODE 0
`endif

`ifdef DTDM
  `define DMA_RAB 26
`elsif VIRTEX8
  `define DMA_RAB 31
`else
  `define DMA_RAB 28
`endif

`ifdef V6M
  `define DMA_TMC 11
`elsif K8M
  `define DMA_TMC 11
`elsif K8P
  `define DMA_TMC 11
`else
  `define DMA_TMC 8
`endif

`ifndef DMA_PIPE_MIBUS
  `define DMA_PIPE_MIBUS 0
`endif

`ifndef DMA_NR
  `define DMA_NR 1
`endif

module dmacontroller (
  bclk,baddr, brd,brdbus,bwr,bwrbus,			// PPC BRam IF
  sclk,scs,saddr, srd,srdbus,swr,swrbus,		// System Control
  aclk,ago,apd,acmd,bfull,qrdym,qselm,			// Memory Control
  irdym,ienam,ibusm, ordym,oenam,obusm,			// Memory Bus
  ioclk,rst,rsto,rxt, istat,ostat,oroute, 		// Block Status
  ienap,ibusp,oenap,obusp, addrp,sizep,busyp,okp,	// Processor bus
  ienah,ibush,oenah,obush, hpchn,hprst,			// Host/PCI bus
  ienaha,ibusha,iselha,oenaha,obusha,oselha, 		// Hyper/IOM bus A
  ienahb,ibushb,iselhb,oenahb,obushb,oselhb,		// Hyper/IOM bus B
  jenaca,ibusca,iselca,oenaca,obusca,oselca,    	// Core bus A
  jenacb,ibuscb,iselcb,oenacb,obuscb,oselcb,		// Core bus B
  jenata,ibusta,iselta,oenata,obusta,oselta, 		// Tuner/PM bus A
  jenatb,ibustb,iseltb,oenatb,obustb,oseltb,		// Tuner/PM bus B
`ifdef DMA_NIO
  ienan,ibusn,iseln,oenan,obusn,oseln, istatn,nioena,	// Network bus 
`endif
`ifdef DMA_NVM
  ienanv,ibusnv,iselnv,oenanv,obusnv,oselnv, iselhc,	// NVME bus 
`endif
`ifdef DMA_DIO
  ienad,iadrd,						// PCIe direct IO bus
`endif
`ifdef ICE_MB
  mcfga,mcfgb, mcena,mcenb,				// XBar signals
`else
  oenahc, hrios,hrdys, hold,hihold,hohold,			// PM signals
`endif
  test); 

  parameter IBW=64;	// Internal Bus Width

  localparam RAB=`DMA_RAB;	// Useable RAM address bits
  localparam TMC=`DMA_TMC;	// Tuner port channel bits
  localparam LBW=(IBW==256)?2:(IBW==128)?1:0;
  localparam MB=(`DMA_MODE==0)?1:0;	// Main Board | Crossbar only
  localparam PM=(`DMA_MODE >0)?1:0;	// Processor Module Modes
  localparam PC=(`DMA_MODE==2)?1:0;	// PAC|PIC NOPM MB/PM Hybrid

  input bclk;
  input brd,bwr;
  input [15:0] baddr;
  output [31:0] brdbus;
  input [31:0] bwrbus;

  input sclk,scs,srd,swr;
  input [31:0] saddr,swrbus;
  output [31:0] srdbus;

  input ioclk,rst;
  output rsto,rxt;
  input [15:0] istat,ostat;
  output [31:0] oroute;

  input aclk,bfull;
  output irdym,ordym;
  output ago,apd;
  output [35:0] acmd;

  input  okp;
  input  [1:0] sizep;
  input  [31:0] addrp; 
  input  [23:0] qrdym;
  output [15:0] qselm;

  input  [17:0] oselta,oseltb;
  input  [7:0] oselca,oselcb,oselha,oselhb;
  output [7:0] iselca,iselcb,iselha,iselhb,iselta,iseltb;
  input  [IBW-1:0] ibusm, obusp,obush, obusca,obuscb, obusha,obushb, obusta,obustb; 
  output [IBW-1:0] obusm, ibusp,ibush, ibusca,ibuscb, ibusha,ibushb, ibusta,ibustb; 
  output oenam, oenap,oenah, oenaca,oenacb, oenaha,oenahb, oenata,oenatb; 
  output ienam, ienap,ienah, jenaca,jenacb, ienaha,ienahb, jenata,jenatb; 
  output busyp, hprst;
  output [5:0] hpchn;
`ifdef DMA_DIO
  output ienad;
  output [31:0] iadrd;
`endif
`ifdef ICE_MB
  input [35:0] mcfga,mcfgb;
  output [7:0] mcena,mcenb;
`else
  output oenahc;
  output [2:0] hrios;
  input  [2:0] hrdys;
  input  [3:0] hold;
  output [3:0] hihold,hohold;
`endif
`ifdef DMA_NIO
  output ienan,oenan;
  output reg [7:0] iseln;
  output reg [IBW-1:0] ibusn;
  input [7:0] oseln;
  input [IBW-1:0] obusn;
  input [3:0] istatn;
  output [1:0] nioena;
`else
  wire ienan;
  wire [7:0] oseln=0;
  wire [3:0] istatn=0;
  wire [IBW-1:0] obusn;
`endif
`ifdef DMA_NVM
  output ienanv,oenanv;
  output [7:0] iselnv;
  output [IBW-1:0] ibusnv;
  input [15:0] oselnv;
  input [IBW-1:0] obusnv;
  output [15:0] iselhc;
`else
  wire [IBW-1:0] obusnv;
`endif
`ifdef TRACE_DBG_DMAC
  output [32:0] test;
`else
  output [7:0] test;
`endif

  wire H=1, L=0;
  wire [3:0] L4 = {4{L}};
  wire [5:0] L6 = {6{L}};

  //enable registers
  reg oenam,oenad,oenan,oenanv, oenap,oenah, oenaca,oenacb, oenaha,oenahb, oenata,oenatb, oenahc; 
  reg ienam,ienad,jenan,ienanv, ienap,ienah, ienaca,ienacb, ienaha,ienahb, ienata,ienatb; 
  reg oenamx,oenamd, oenama,oenamb, ienapd;
  reg [31:0] iadrd;
  reg [7:0] iselnv;

  // cycle counter
  reg [3:0] count;
  reg ostart,istart,ostars;
  reg ostartd,istartd,istarte;
  reg setimux,setomux;
  reg lock1,lock2,lock3,lock4;
  always @ (posedge ioclk) begin
    if (rst) count <= 0;   else count  <= count+1;
    if (rst) ostars <= 0;  else ostars <= (IBW>=64)? (count[2:0]==5) : (count[3:0]==13);
    if (rst) ostart <= 0;  else ostart <= (IBW>=64)? (count[2:0]==6) : (count[3:0]==14);
    if (rst) istart <= 0;  else istart <= (IBW>=64)? (count[2:0]==0) : (count[3:0]==0);
    if (rst) lock1 <= 0;   else lock1  <= (IBW>=64)? (count[2:0]==2) : (count[3:0]==10);
    lock2  <= lock1 || rst;
    lock3  <= lock2;
    lock4  <= lock3;
    setomux <= ostart;
    setimux <= setomux;
    istartd <= istart;
    istarte <= istartd;
    ostartd <= ostart;
  end

  // route registers
  reg rrst, rflg, rset, rclr, rcnt, rone, ronu, rspd, rsto, rxt, rnio, rfdc, rdcal, rdspd, rdflg, rdsta;
  reg [31:0] route, croute, oroute, flags;
  reg [7:0] onden, ondup, ondok;	// On Demand mode
  reg [2:0] rstcnt;
  wire [7:0] calib;
  wire wreg = (swr && scs && saddr[15] && !saddr[14]);

  // route set/clear 
  always @ (posedge ioclk) begin
    rrst <= (wreg && saddr[4:2]==0);
    rflg <= (wreg && saddr[4:2]==1);
    rset <= (wreg && saddr[4:2]==2);
    rclr <= (wreg && saddr[4:2]==3);
    rcnt <= (wreg && saddr[4:2]==4);
    rspd <= (wreg && saddr[4:2]==5);
    rnio <= (wreg && saddr[4:2]==6) && saddr[1:0]==0;
    rone <= (wreg && saddr[4:2]==6) && saddr[1:0]==1;
    ronu <= (wreg && saddr[4:2]==6) && saddr[1:0]==2;
    rfdc <= (wreg && saddr[4:2]==7);
    rsto <= rrst || (rsto && rstcnt!=7); // triggers rst
    if (!(rsto||rxt)) rstcnt <= 0; else rstcnt <= rstcnt+1;
    if (rst) flags <= 0; else if (rflg) flags <= swrbus;
    if (rst) onden <= 0; else if (rone) onden <= onden | swrbus[31:24];
    if (!ronu) ondup <= 0; else ondup <= swrbus[31:24];
    if (rst) route[15:0] <= 0; else route[15:0] <= ({16{rset}} & swrbus[15:0])  | (route[15:0] & ~({16{rclr}} & swrbus[15:0]));
    if (rst) route[31:16]<= 0; else route[31:16]<= ({16{rset}} & swrbus[31:16]) | (route[31:16] & ~croute[31:16]); 
    rxt  <= ((rclr|rset) && swrbus[`R2HP] && !flags[`TMUXC]) || (rxt && rstcnt!=7);
    rdcal <= scs && saddr[13] && (MB||PC);
    rdflg <= scs && saddr[15] && !saddr[14] && !saddr[13] && saddr[4:2]==1;
    rdspd <= scs && saddr[15] && !saddr[14] && !saddr[13] && saddr[4:2]==5;
    rdsta <= scs && saddr[15] && !saddr[14] && !saddr[13] && saddr[4:2]==7;
    oroute <= { route[31:24] | qrdym[7:0], route[23:8], rdcal? calib : route[7:0]};
  end  

  // HX mux control
  wire tmuxa = flags[`TMUXA];
  wire tmuxb = flags[`TMUXB];
  wire tmuxc = flags[`TMUXC];
`ifdef ICE_MB
  wire qmux  = mcfga[35];	// QDR muxx mode
  wire bmux  = mcfga[31];
  wire hxmix = mcfga[21];
  wire hxfmt = (mcfga[9:8]==0);
  wire cxmix = flags[`CXMIX];
  wire hpfdp = flags[`HPFDP];
  wire hxfdp = flags[`HXFDP];
  wire cxfdp = flags[`CXFDP];
  wire txfdp = flags[`TXFDP];	// should stay low so TA1/TA2 or TB1/TB2 can clear ahead of direct taod|tbod cycle
  wire ta2hx = MB? flags[`TA2HX] : 0;
  wire tb2hx = MB? flags[`TB2HX] : 0;
  wire tx2hx = L;
  wire cx2hx = L;
  wire tx2cx = L;
  wire cxaob = flags[`CXAOB];
  wire hxaob = flags[`HXAOB];
  wire txaob = flags[`TXAOB];
  wire ta2ha = route[`TA2HA] && PC;
  wire tb2hb = route[`TB2HB] && PC;
  wire biha  = route[`HA2R] && (ta2hx || tb2hx || route[`CA2HA] || ta2ha);
  wire bihb  = route[`HB2R] && (ta2hx || tb2hx || route[`CB2HB] || tb2hb);
`else
  wire qmux  = L;
  wire bmux  = L;
  wire hxmix = L;
  wire hxfmt = L;
  wire cxmix = flags[`CXMIX];
  wire hpfdp = L;
  wire hxfdp = flags[`HXFDP];
  wire cxfdp = flags[`CXFDP];
  wire txfdp = flags[`TXFDP];
  wire ta2hx = L;
  wire tb2hx = L;
  wire tx2hx = flags[`TX2HX];
  wire cx2hx = flags[`CX2HX];
  wire tx2cx = flags[`TX2CX];
  wire cxaob = flags[`CXAOB];
  wire hxaob = flags[`HXAOB];
  wire txaob = flags[`TXAOB];
  wire ta2ha = route[`TA2HA];
  wire tb2hb = route[`TB2HB];
  wire biha  = L;
  wire bihb  = L;
`endif

`ifdef DMA_NIO
  wire nio2m  = flags[`NIO2M];
  wire m2nio  = flags[`M2NIO];
  wire nio2hx = flags[`NIO2HX];
  wire hx2nio = flags[`HX2NIO];
  wire txtag  = flags[`TXTAG];
  assign nioena={m2nio|hx2nio,nio2m|nio2hx};
`else
  wire nio2m  = L;
  wire m2nio  = L;
  wire nio2hx = L;
  wire hx2nio = L;
  wire txtag  = L;
`endif

`ifdef PIC8
  wire mbx = MB;
`else
  wire mbx = L;
`endif

`ifndef DMA_NVM
  wire nv2m=L, m2nv=L, nv2hx=L, hx2nv=L;
`elsif ICE_PM
  wire nv2m=L, m2nv=L, nv2hx=H, hx2nv=H;
`else
  wire nv2m=H, m2nv=H, nv2hx=L, hx2nv=L;
`endif

`ifdef DMA_DIO
  wire dio2p = flags[`DIO2P];
  wire p2dio = L;
`else
  wire dio2p = L;
  wire p2dio = L;
`endif

`ifdef DMA_CX2CX
  wire ca2cb = flags[`CA2CB];
  wire cb2ca = flags[`CB2CA];
`else
  wire ca2cb = L;
  wire cb2ca = L;
`endif

`ifdef DMA_TX2TX
  wire ta2tb = flags[`TA2TB];
  wire tb2ta = flags[`TB2TA];
`else
  wire ta2tb = L;
  wire tb2ta = L;
`endif

`ifdef DMA_TICS
  wire ictena=oselta[17];
  wire ictenb=oseltb[17];
`else
  wire ictena=L;
  wire ictenb=L;
`endif
  wire txout = ictenb | ictena;
  wire ictrdya,ictqueva,ictselva; wire [3:0] ictquexa,ictselxa; wire [2:0] cnta; wire stata;
  wire ictrdyb,ictquevb,ictselvb; wire [3:0] ictquexb,ictselxb; wire [2:0] cntb; wire statb;

`ifdef ICE_MB
  wire [7:0] qena = {!ictenb,!ictena,H,H, mcfgb[0],mcfga[0],H,H};
`else
  wire [7:0] qena = {H,H,H,H, H,H,H,H};
`endif

  // dont wait on HP for dio2p now 
  wire afull = m2nio? !istat[`NIO] || bfull : bfull;
  // just copy to mem if !HP
  reg dio2p_; always @ (posedge ioclk) if (ostart) dio2p_ <= dio2p && istat[`HP];

  // speedometer
  reg rspdact;
  reg [4:0] rspdadr;
  reg [27:0] rspdcnt;
  wire [3:0] rspdsta = LBW;
  wire [31:0] ractive = {4'd0,ienanv,ienan,ienad,ienam, ienatb,ienata,ienacb,ienaca,ienahb&&!hxmix,ienaha,ienah,ienap, 
			 4'd0,oenanv,oenan,oenad,oenam, oenatb,oenata,oenacb,oenaca,oenahb&&!hxmix,oenaha,oenah,oenap};
  always @ (posedge ioclk) begin
    if (rspd) rspdadr <= swrbus;
    if (rspd) rspdact <= 0; else rspdact <= lock1 && ractive[rspdadr];
    if (rspd) rspdcnt <= 0; else if (rspdact) rspdcnt <= rspdcnt+1;
  end

  // flags readback
  wire [31:0] aflags = {hx2nv,nv2hx,hx2nio,nio2hx,m2nv,nv2m,m2nio,nio2m,
			txtag,cxmix,p2dio,dio2p,tb2hx,ta2hx,txaob,hxaob,
			cxaob,tx2cx,cx2hx,tx2hx,tb2ta,ta2tb,cb2ca,ca2cb,
			txfdp,cxfdp,hxfdp,hpfdp,txout,tmuxc,tmuxb,tmuxa};

  // framed decimation circuit
`ifdef DMA_FDEC
  reg [11:LBW] frama,framb;
  reg [15:0] fcnta,fcntb;
  reg [31:0] routx,fdeca,fdecb;
  reg rfdca,rfdcb;
  reg ffdeca,ffdecb,fddeca,fddecb;
  reg framaz,frambz,fcntaz,fcntbz;
  wire fgoa = oenaha && ostartd && fdeca[28];
  wire fgob = oenahb && ostartd && fdecb[28];
  wire ffloada = framaz && fgoa;
  wire ffloadb = frambz && fgob;
  wire fdloada = fcntaz && ffloada;
  wire fdloadb = fcntbz && ffloadb;
  always @ (posedge ioclk) begin
    rfdca  <= (rfdc &&  saddr[0]);
    rfdcb  <= (rfdc && !saddr[0]);
    framaz <= (frama==0);
    frambz <= (framb==0);
    fcntaz <= (fcnta==0);
    fcntbz <= (fcntb==0);
    if (rst) fdeca <= 0; else if (rfdc &&  saddr[0]) fdeca <= swrbus;
    if (rst) fdecb <= 0; else if (rfdc && !saddr[0]) fdecb <= swrbus;
    if (rst) frama <= 0; else if (ffloada|rfdca) frama <= fdeca[27:16+LBW]; else if (fgoa) frama <= frama-1;
    if (rst) framb <= 0; else if (ffloadb|rfdcb) framb <= fdecb[27:16+LBW]; else if (fgob) framb <= framb-1;
    if (rst) fcnta <= 0; else if (fdloada) fcnta <= fdeca[15:0];      else if (ffloada) fcnta <= fcnta-1;
    if (rst) fcntb <= 0; else if (fdloadb) fcntb <= fdecb[15:0];      else if (ffloadb) fcntb <= fcntb-1;
    routx[`HA2R] <= route[`HA2R] && (fcnta==0);
    routx[`HB2R] <= route[`HB2R] && (fcntb==0);
  end
`else
  wire [31:0] routx = route;
`endif

  // HostPort channel
  reg hprst, p2rdy;
  reg [5:0] hpchn;
  always @ (posedge ioclk) begin
    p2rdy <= ostat[`HP];
    hprst <= rcnt;
`ifdef PIC5
    if (rcnt) hpchn <= swrbus[3:0];
`else
    if (rcnt) hpchn[1:0] <= swrbus[1:0];
    hpchn[3:2] <= {route[`HP2R],route[`R2HP]};
    hpchn[5:4] <= {qrdym[1],qrdym[17]};
`endif
  end  

  wire [3:0] ctest;
`ifdef ICE_MB
  // most resource efficient way to test Hyper IFs
  calibrate #(64) ocm (ioclk,oenamx,obusm, calib,ctest);
`else
  assign calib = 0;
`endif

  reg [15:0] istax,ostax;
  assign srdbus = rdspd? {rspdsta,rspdcnt} : rdsta? {ostat,istat} : rdflg? aflags : oroute;
//  assign srdbus = rdspd? {16'h1ce0,ondok,onden} : rdsta? {ostat,istat} : rdflg? aflags : oroute;

  // combined conditioners 
  always @ (posedge ioclk) begin
    ostax[`PR] <= (nio2m && ostat[`NIO] && route[`PR2R]) || ostat[`PR];
    ostax[`HA] <= ostat[`HA] && (!hxmix || ostat[`HB]);
    ostax[`HB] <= ostat[`HB] && (!hxmix || ostat[`HA]);
    ostax[`CA] <= ostat[`CA] && (!cxmix || ostat[`CB]);
    ostax[`CB] <= ostat[`CB] && (!cxmix || ostat[`CA]);
  end
  always @ (posedge ioclk) begin
    istax[`PR] <= istat[`PR]; 
    istax[`HP] <= istat[`HP]; 
    istax[`HA] <= istat[`HA] && (!hxmix || istat[`HB]) && (!onden[`HA] || ondok[`HA]);
    istax[`HB] <= istat[`HB] && (!hxmix || istat[`HA]) && (!onden[`HB] || ondok[`HB]);
    istax[`CA] <= istat[`CA] && (!cxmix || istat[`CB]) && (!onden[`CA] || ondok[`CA]); 
    istax[`CB] <= istat[`CB] && (!cxmix || istat[`CA]) && (!onden[`CB] || ondok[`CB]);
    istax[`TA] <= istat[`TA] &&	(!ictena || ictrdya)   && (!onden[`TA] || ondok[`TA]);
    istax[`TB] <= istat[`TB] &&	(!ictenb || ictrdyb)   && (!onden[`TB] || ondok[`TB]);
    istax[`HA1] <= (PM&!PC)? istat[`HA1] : istat[`HA];
    istax[`HB1] <= (PM&!PC)? istat[`HB1] : istat[`HB];
    istax[`TA1] <= (MB&!PC)? istat[`TA1] : istat[`TA];
    istax[`TA2] <= (MB&!PC)? istat[`TA2] : istat[`TA];
    istax[`TB1] <= (MB&!PC)? istat[`TB1] : istat[`TB];
    istax[`TB2] <= (MB&!PC)? istat[`TB2] : istat[`TB];
  end
  
  // 1-to-N wait conditioners (all must go at once)
  reg haow,hbow,haod,hbod, caow,cbow,caod,cbod, taow,tbow,taod,tbod, taux,tbux, tauz,tbuz, ota_selc,otb_selc;
  always @ (posedge ioclk) begin
    haow <= (routx[`HA2R]&&afull) || (oenaha && !hxfdp) || !ostat[`HA] || (hx2nio && !istat[`NIO])
         || (route[`HA2CA]&&!istax[`CA])  || (route[`HA2CB]&&!istax[`CB])
         || (route[`HA2TA]&&!istax[`TA1]) || (route[`HA2TB]&&!istax[`TB1]);
    hbow <= (routx[`HB2R]&&afull) || (oenahb && !hxfdp) || !ostat[`HB] || (hx2nio && !istat[`NIO])
         || (route[`HB2CA]&&!istax[`CA])  || (route[`HB2CB]&&!istax[`CB])
         || (route[`HB2TA]&&!istax[`TA2]) || (route[`HB2TB]&&!istax[`TB2]);
    haod <= (route[`HA2CA] || route[`HA2CB] || route[`HA2TA] || route[`HA2TB]) || (route[`HA2R] && !routx[`HA2R]) || hx2nio;
    hbod <= (route[`HB2CA] || route[`HB2CB] || route[`HB2TA] || route[`HB2TB]) || (route[`HB2R] && !routx[`HB2R]) || (hx2nio && !hxmix);

    caow <= (route[`CA2R]&&afull) || (oenaca && !cxfdp) || !ostat[`CA] || (ca2cb&&!istax[`CB])
         || (route[`CA2HA]&&!istax[`HA1]) || (MB&&route[`CA2HB]&&!istax[`HB1])
         || (route[`CA2TA]&&!istax[`TA1]) || (MB&&route[`CA2TB]&&!istax[`TB1]);
    cbow <= (route[`CB2R]&&afull) || (oenacb && !cxfdp) || !ostat[`CB] || (cb2ca&&!istax[`CA])
         || (route[`CB2HB]&&!istax[`HB1]) || (MB&&route[`CB2HA]&&!istax[`HA1])
         || (route[`CB2TB]&&!istax[`TB2]) || (MB&&route[`CB2TA]&&!istax[`TA2]);
    caod <= (route[`CA2HA] || (MB&&route[`CA2HB]) || route[`CA2TA] || (MB&&route[`CA2TB]) || ca2cb);
    cbod <= (route[`CB2HB] || (MB&&route[`CB2HA]) || route[`CB2TB] || (MB&&route[`CB2TA]) || cb2ca);

    ota_selc <= MB? ostat[`TA2] && istax[`HB] : L;
    otb_selc <= MB? ostat[`TB2] && istax[`HB] : L;
    taow <= MB? (oenata && !txfdp) || !((ostat[`TA1] && istax[`HA]) || (ostat[`TA2] && istax[`HB])) || (ta2tb && !istax[`TB]) 
       : (route[`TA2R]&&afull) || (oenata && !txfdp) || !ostat[`TA] || (route[`TA2CA]&&!istax[`CA]) || (ta2ha&&!istax[`HA]);
    tbow <= MB? (oenatb && !txfdp) || !((ostat[`TB1] && istax[`HA]) || (ostat[`TB2] && istax[`HB])) || (tb2ta && !istax[`TA])
       : (route[`TB2R]&&afull) || (oenatb && !txfdp) || !ostat[`TB] || (route[`TB2CB]&&!istax[`CB]) || (tb2hb&&!istax[`HB]);
    taod <= MB? ta2hx : (route[`TA2CA] || ta2ha); 
    tbod <= MB? tb2hx : (route[`TB2CB] || tb2hb);
    // the aux channels for NIO/NVME paths
    taux <= mbx? (oselta[7:6]==3) : L;
    tbux <= mbx? (oseltb[7:6]==3) : L;
    // bleed off old data if TX2R is disabled
    tauz <= mbx? !route[`TA2R] && !oenata && ostat[`TA] && (oselta[7:6]==2) : L;
    tbuz <= mbx? !route[`TB2R] && !oenatb && ostat[`TB] && (oseltb[7:6]==2) : L;
  end

  // possible multi-source input resolution TX2HX and CX2HX
  wire txow = PM && (ta2ha||tb2hb) && ((route[`CA2HA]&!caow) || (route[`CB2HB]&!cbow));

  // direct output selectors
  reg oph_sel,oph_act,ocx_sel,ocx_act,ohx_sel,ohx_act,otx_sel,otx_act,oca_act,ocb_act,ocx_actr,ohx_actr,otx_actr,otx_selc,onv_act,oni_act;
  wire ohx_sel_  = hxaob? !(haod && !haow) : (hbod && !hbow);
  wire ocx_sel_  = cxaob? !(caod && !caow) : (cbod && !cbow);
  wire otx_sel_  = txaob? !(taod && !taow) : (tbod && !tbow);
  wire ocx_act_  = (caod && !caow) || (cbod && !cbow);
  wire ohx_act_  = (haod && !haow) || (hbod && !hbow);
  wire otx_act_  = (taod && !taow) || (tbod && !tbow);
  wire ohx_actr_ = ohx_act_ && (ohx_sel_?routx[`HB2R]:routx[`HA2R]);
  wire ocx_actr_ = ocx_act_ && (ocx_sel_?route[`CB2R]:route[`CA2R]);
  wire otx_actr_ = otx_act_ && (otx_sel_?route[`TB2R]:route[`TA2R]) && PM;  // to allow TX2R and TX2HX simultaneously on PM
  always @ (posedge ioclk) begin
   if (lock1) begin
    oph_sel <= L;
    oph_act <= L;
    ohx_sel <= ohx_sel_;
    ohx_act <= ohx_act_;
    ocx_sel <= ocx_sel_;
    ocx_act <= ocx_act_ && !(ocx_actr_ && ohx_actr_);
    otx_sel <= otx_sel_;
    otx_act <= otx_act_ && !(otx_actr_ && (ohx_actr_||ocx_actr_)) && !txow && !ocx_act_;
    oca_act <= ca2cb && ostat[`CA] && istax[`CB] && !route[`CA2R];
    ocb_act <= cb2ca && ostat[`CB] && istax[`CA] && !route[`CB2R];
    otx_selc <= otx_sel_? otb_selc : ota_selc;
   end
   if (lock2) ohx_actr <= ohx_act && (ohx_sel?routx[`HB2R]:routx[`HA2R]);
   if (lock2) ocx_actr <= ocx_act && (ocx_sel?route[`CB2R]:route[`CA2R]);
   if (lock2) otx_actr <= otx_act && (otx_sel?route[`TB2R]:route[`TA2R]);
   if (lock2) onv_act  <= nv2hx  && ostat[`NV]  && istat[`HA] && !otx_act && !ocx_act && !onv_act;
   if (lock2) oni_act  <= nio2hx && ostat[`NIO] && istat[`HA] && !otx_act && !ocx_act && !oni_act;	// oni_act negates onv_act
  end

  // direct input selectors
  reg iph_act,iha_actd,ihb_actd; 
  reg ica_act,icb_act,iha_act,ihb_act,ita_act,itb_act,inv_act,ini_act;
  reg ica_act2,icb_act2,iha_act2,ihb_act2,ita_act2,itb_act2;
  always @ (posedge ioclk) begin
   if (lock2) begin
    iph_act  <= L;
    ini_act  <= hx2nio && ohx_act;
    inv_act  <= hx2nv && ostat[`HC] && istat[`NV] && !ohx_act;
    iha_act  <= MB? otx_act && !otx_selc : otx_act && (otx_sel?  L : ta2ha);
    ihb_act  <= MB? otx_act &&  otx_selc : otx_act && (otx_sel? tb2hb : L );
    ica_act  <= ohx_act && (ohx_sel?     route[`HB2CA] :     route[`HA2CA]);
    icb_act  <= ohx_act && (ohx_sel?     route[`HB2CB] :     route[`HA2CB]);
    ita_act  <= ohx_act && (ohx_sel?     route[`HB2TA] :     route[`HA2TA]);
    itb_act  <= ohx_act && (ohx_sel?     route[`HB2TB] :     route[`HA2TB]);
    ica_act2 <= otx_act && (otx_sel?     L             : PM&&route[`TA2CA]);
    icb_act2 <= otx_act && (otx_sel? PM&&route[`TB2CB] :     L            );
    iha_act2 <= ocx_act && (ocx_sel? MB&&route[`CB2HA] :     route[`CA2HA]);
    ihb_act2 <= ocx_act && (ocx_sel?     route[`CB2HB] : MB&&route[`CA2HB]);
    ita_act2 <= ocx_act && (ocx_sel? MB&&route[`CB2TA] :     route[`CA2TA]);
    itb_act2 <= ocx_act && (ocx_sel?     route[`CB2TB] : MB&&route[`CA2TB]);
   end
  end
  wire ihx_act  = iha_act || ihb_act;
  wire icx_act  = ica_act || icb_act;
  wire itx_act  = ita_act || itb_act;
  wire ihx_act2 = iha_act2 || ihb_act2;
  wire icx_act2 = ica_act2 || icb_act2;
  wire itx_act2 = ita_act2 || itb_act2;
  wire ihx_sel  = ihb_act;
  wire ihx_sel2 = ihb_act2;

  // PM loopback variables
  reg ita_act3,itb_act3;
  reg ita_act4,itb_act4;
  wire itx_act3 = ita_act3 || itb_act3;	// loop from one PM to the other - normal
  wire itx_act4 = ita_act4 || itb_act4;	// loop from one PM to the other - direct
  wire itx_actx = ita_act3 || itb_act3 || ita_act4 || itb_act4;

  // RAM control selectors
  reg [2:0] ri_sel,qi_sel,ro_sel,ro_sel_,rprep;
  reg ri_act,qi_act,ro_act,ro_act_,rip_hold,iclear;

  // RAM input selectors (prefetch from RAM)
  reg qirdyha,qirdyhb, qirdyta,qirdytb, qirdyp,qirdyh,qirdyca,qirdycb;
  wire _qi_sel2 = (qirdyca||qirdycb||qirdyta||qirdytb) && !(qirdyha||qirdyhb);
  wire _qi_sel1 = _qi_sel2? (qirdyta||qirdytb) : (qirdyha||qirdyhb);
  wire _qi_sel0 = _qi_sel2? (_qi_sel1? qirdytb:qirdycb) : (_qi_sel1? qirdyhb:qirdyh);
  always @ (posedge ioclk) begin
    qirdyp  <= !rst	    && !qrdym[ 8] && istat[`PR] && !rip_hold;
    qirdyh  <= route[`R2HP] && !qrdym[ 9] && istax[`HP] && !(qi_act && qi_sel==1 && !hpfdp);
    qirdyha <= route[`R2HA] && !qrdym[10] && istax[`HA] && !(qi_act && qi_sel==2 && !hxfdp);
    qirdyhb <= route[`R2HB] && !qrdym[11] && istax[`HB] && !(qi_act && qi_sel==3 && !hxfdp);
    qirdyca <= route[`R2CA] && !qrdym[12] && istax[`CA] && !(qi_act && qi_sel==4 && !cxfdp);
    qirdycb <= route[`R2CB] && !qrdym[13] && istax[`CB] && !(qi_act && qi_sel==5 && !cxfdp);
    qirdyta <= route[`R2TA] && !qrdym[14] && istax[`TA] && !(qi_act && qi_sel==6 && !txfdp);
    qirdytb <= route[`R2TB] && !qrdym[15] && istax[`TB] && !(qi_act && qi_sel==7 && !txfdp);
   if (lock3) begin
    // lowest priority at top=ph, highest at bottom=tb
    qi_act  <= (qirdyp||qirdyh||qirdyca||qirdycb||qirdyha||qirdyhb||qirdyta||qirdytb) && !iclear;
    qi_sel  <= {_qi_sel2,_qi_sel1,_qi_sel0};
   end
    // prevent multiple read requests from processor
    if (rst) rip_hold <= 0; else rip_hold <= (lock4 && qi_act && qi_sel==0) || (rip_hold && !ienapd);
    ienapd <= ienap; // this delay is really important
  end

  // RAM input selectors (postfetch from RAM)
  reg rirdyha,rirdyhb, rirdyta,rirdytb, rirdyp,rirdyh,rirdyca,rirdycb;
  wire rirdyha_ = rirdyha && !ihx_act && !ihx_act2 && !onv_act && !oni_act;
  wire rirdyhb_ = rirdyhb && !ihx_act && !ihx_act2 && !onv_act && !oni_act;
  wire rirdyca_ = rirdyca && !icx_act && !icx_act2;
  wire rirdycb_ = rirdycb && !icx_act && !icx_act2;
  wire rirdyta_ = rirdyta && !itx_act && !itx_act2;
  wire rirdytb_ = rirdytb && !itx_act && !itx_act2;
  wire _ri_sel2 = (rirdyca_||rirdycb_||rirdyta_||rirdytb_) && !(rirdyha_||rirdyhb_);
  wire _ri_sel1 = _ri_sel2? (rirdyta_||rirdytb_) : (rirdyha_||rirdyhb_);
  wire _ri_sel0 = _ri_sel2? (_ri_sel1? rirdytb_:rirdycb_) : (_ri_sel1? rirdyhb_:rirdyh);
  wire _ri_act  = (rirdyp||rirdyh||rirdyca_||rirdycb_||rirdyha_||rirdyhb_||rirdyta_||rirdytb_);

  always @ (posedge ioclk) begin
    rirdyp  <= qrdym[0] && (istat[`PR]|!qena[0]); 
    rirdyh  <= qrdym[1] && (istax[`HP]|!qena[1]) && !(ri_act && ri_sel==1 && !hpfdp);
    rirdyha <= qrdym[2] && (istax[`HA]|!qena[2]) && !(ri_act && ri_sel==2 && !hxfdp);// && !ihx_act && !ihx_act2;
    rirdyhb <= qrdym[3] && (istax[`HB]|!qena[3]) && !(ri_act && ri_sel==3 && !hxfdp);// && !ihx_act && !ihx_act2;
    rirdyca <= qrdym[4] && (istax[`CA]|!qena[4]) && !(ri_act && ri_sel==4 && !cxfdp);// && !icx_act && !icx_act2;
    rirdycb <= qrdym[5] && (istax[`CB]|!qena[5]) && !(ri_act && ri_sel==5 && !cxfdp);// && !icx_act && !icx_act2;
    rirdyta <= qrdym[6] && (istax[`TA]|!qena[6]) && !(ri_act && ri_sel==6 && !txfdp);// && !itx_act && !itx_act2;
    rirdytb <= qrdym[7] && (istax[`TB]|!qena[7]) && !(ri_act && ri_sel==7 && !txfdp);// && !itx_act && !itx_act2;
   if (lock3) begin
    // lowest priority at top=ph, highest at bottom=tb
    ri_act <= _ri_act;
    ri_sel <= {_ri_sel2,_ri_sel1,_ri_sel0};
   end
  end

  // RAM output selectors (to RAM)
  reg rordyha,rordyhb, rordyta,rordytb, rordyp,rordyh,rordyca,rordycb,oclear,oenahp;
  // ha|hb must have highest priority to allow direct route combinations
  wire _ro_sel2 = (rordyca||rordycb||rordyta||rordytb) && !(rordyha||rordyhb);
  wire _ro_sel1 = _ro_sel2? (rordyta||rordytb) : (rordyha||rordyhb);
  wire _ro_sel0 = _ro_sel2? (_ro_sel1? rordytb:rordycb) : (_ro_sel1? rordyhb:rordyh);
  wire phwait = afull || oph_act || ocx_actr || otx_actr || ohx_actr || onv_act || oni_act;
  wire cxwait = afull || ocx_act || otx_actr || ohx_actr;
  wire hxwait = afull || ohx_act || otx_actr || ocx_actr;
  wire hawait = biha && (iha_act || iha_act2 || iha_actd);
  wire hbwait = bihb && (ihb_act || ihb_act2 || ihb_actd);
  wire txwait = afull || otx_act || ohx_actr || ocx_actr;
  wire tawait = txwait || (MB && ta2tb && (!istax[`TB] || itx_act || itx_act2));
  wire tbwait = txwait || (MB && tb2ta && (!istax[`TA] || itx_act || itx_act2));
  always @ (posedge ioclk) begin
    rordyp  <= !rst         && 					(ostax[`PR] &&                     !phwait);
    rordyh  <= route[`HP2R] && 					(ostat[`HP] && (!oenah ||hpfdp) && !phwait);
    rordyha <= routx[`HA2R] && (haod? (ohx_act && !ohx_sel) :	(ostax[`HA] && (!oenaha||hxfdp) && !hxwait && !hawait));
    rordyhb <= routx[`HB2R] && (hbod? (ohx_act &&  ohx_sel) :	(ostax[`HB] && (!oenahb||hxfdp) && !hxwait && !hbwait));
    rordyca <= route[`CA2R] && (caod? (ocx_act && !ocx_sel) :	(ostax[`CA] && (!oenaca||cxfdp) && !cxwait));
    rordycb <= route[`CB2R] && (cbod? (ocx_act &&  ocx_sel) :	(ostax[`CB] && (!oenacb||cxfdp) && !cxwait));
    rordyta <= (route[`TA2R] || taux) && 			(ostat[`TA] && (!oenata||txfdp) && !tawait);
    rordytb <= (route[`TB2R] || tbux) && 			(ostat[`TB] && (!oenatb||txfdp) && !tbwait);
   if (lock4) begin
    ro_act  <= (rordyp||rordyh||rordyca||rordycb||rordyha||rordyhb||rordyta||rordytb);
    ro_sel  <= {_ro_sel2,_ro_sel1,_ro_sel0};
    oenahp  <= rordyh && !_ro_sel2 && !_ro_sel1;	// pre oenah
   end
  end

  // processor interfaces (sizep 0=4by 1=16by 2=32by 3=64by)
  reg [1:0] isz,osz;
  wire ipdone32 = (isz==0) || (isz==1 && count[3:0]==5) || (isz==2 && count[3:0]==9) || (isz==3 && count[3:0]==1);
  wire opdone32 = (osz==0) || (osz==1 && count[3:0]==3) || (osz==2 && count[3:0]==7) || (osz==3 && count[3:0]==15);
  wire ipdone64 = (isz==0) || (isz==1 && count[2:0]==3) || (isz==2 && count[2:0]==5) || (isz==3 && count[2:0]==1);
  wire opdone64 = (osz==0) || (osz==1 && count[2:0]==1) || (osz==2 && count[2:0]==3) || (osz==3 && count[2:0]==7);
  wire ipdone   = (IBW>=64)? ipdone64 : ipdone32;
  wire opdone   = (IBW>=64)? opdone64 : opdone32;
  always @ (posedge ioclk) begin
    if (istart) isz <= sizep;
    if (ostart) osz <= sizep;
    if (rst) ienap <= 0; else ienap <= (ri_act && ri_sel==0 && istart) || (ienap && !ipdone);
    if (rst) oenap <= 0; else oenap <= (ro_act && ro_sel==0 && ostart && ostat[`PR]) || (oenap && !opdone);
  end

  // memory interfaces
  wire [IBW-1:0] jbusm;
  wire ostarx = (`DMA_PIPE_MIBUS)? ostars : ostart;
  ff #(IBW,`DMA_PIPE_MIBUS) momm (jbusm,ibusm,ioclk);
  always @ (posedge ioclk) begin
    if (rst) ienam <= 0; else if (ostarx)  ienam <= ri_act && !p2dio;
    if (rst) oenam <= 0; else if (istart)  oenam <= ro_act && !dio2p_;
    if (rst) oenamx<= 0; else if (istartd) oenamx<= ro_act;
  end

  // B side channel dupe for AB mux
  wire [2:0] hm = hxmix? 2:3;
  wire [2:0] cm = cxmix? 4:5;

  // input enables
  always @ (posedge ioclk) begin
    if (rst) ienanv <= 0; else if (istart) ienanv <= inv_act;
    if (rst) ienah  <= 0; else if (istart) ienah  <= (ri_act && ri_sel==1);
    if (rst) ienaha <= 0; else if (istart) ienaha <= (ri_act && ri_sel==2)  || iha_act || iha_act2 || onv_act || oni_act;
    if (rst) ienahb <= 0; else if (istart) ienahb <= (ri_act && ri_sel==hm) || ihb_act || ihb_act2 || (hxmix&&(iha_act||iha_act2));
    if (rst) ienaca <= 0; else if (istart) ienaca <= (ri_act && ri_sel==4)  || ica_act || ica_act2;
    if (rst) ienacb <= 0; else if (istart) ienacb <= (ri_act && ri_sel==cm) || icb_act || icb_act2 || (cxmix&&(ica_act||ica_act2));
    if (rst) ienata <= 0; else if (istart) ienata <= (ri_act && ri_sel==6)  || ita_act || ita_act2;
    if (rst) ienatb <= 0; else if (istart) ienatb <= (ri_act && ri_sel==7)  || itb_act || itb_act2;
    if (istart) iha_actd <= iha_act || iha_act2;
    if (istart) ihb_actd <= ihb_act || ihb_act2;
  end

  // HX blockmux 
`ifdef ICE_MB
  reg shdr;
  reg [4:0] bcnt;
  wire bsel = bcnt[4-LBW];
  wire binc = setomux && (oenaha|oenahb) && !shdr;
  wire bcyc = setomux && (oenaha|oenahb);
  always @ (posedge ioclk) begin
    if (!bmux) bcnt <= 0; else if (binc) bcnt <= bcnt+1;
    if (!bmux) shdr <= 0; else if (bcyc) shdr <= (bcnt[3-LBW:0]==1) && mcfga[13] && !shdr;
  end
`else
  wire bsel=L;
`endif

`ifdef ICE_MB
  // TX multichannel port logic
  reg oenata0,oenata1,oenata2,oenatb0,oenatb1,oenatb2,ienata0,ienata1,ienata2,ienatb0,ienatb1,ienatb2;
  wire istrta = tb2ta? ostartd : istartd;
  wire istrtb = ta2tb? ostartd : istartd;
  always @ (posedge ioclk) begin
    if (rst) oenata0 <= 0; else if (ostart) oenata0 <= (ro_act && ro_sel==6) || (tauz && !otx_act);
    if (rst) oenata1 <= 0; else if (ostart) oenata1 <= (otx_act && !otx_sel && !otx_selc);
    if (rst) oenata2 <= 0; else if (ostart) oenata2 <= (otx_act && !otx_sel &&  otx_selc);
    if (rst) oenatb0 <= 0; else if (ostart) oenatb0 <= (ro_act && ro_sel==7) || (tbuz && !otx_act);
    if (rst) oenatb1 <= 0; else if (ostart) oenatb1 <= (otx_act &&  otx_sel && !otx_selc);
    if (rst) oenatb2 <= 0; else if (ostart) oenatb2 <= (otx_act &&  otx_sel &&  otx_selc);
    if (rst) ienata0 <= 0; else if (istrta) ienata0 <= (ri_act && ri_sel==6);
    if (rst) ienata1 <= 0; else if (istrta) ienata1 <= (ita_act && !ohx_sel) || (ita_act2 && !ocx_sel) || (tb2ta && oenatb1);
    if (rst) ienata2 <= 0; else if (istrta) ienata2 <= (ita_act &&  ohx_sel) || (ita_act2 &&  ocx_sel) || (tb2ta && oenatb2);
    if (rst) ienatb0 <= 0; else if (istrtb) ienatb0 <= (ri_act && ri_sel==7);
    if (rst) ienatb1 <= 0; else if (istrtb) ienatb1 <= (itb_act && !ohx_sel) || (itb_act2 && !ocx_sel) || (ta2tb && oenata1);
    if (rst) ienatb2 <= 0; else if (istrtb) ienatb2 <= (itb_act &&  ohx_sel) || (itb_act2 &&  ocx_sel) || (ta2tb && oenata2);
  end
  assign mcena = MB? {ienata2,ienata1,ienata0,ta2hx,oenata2,oenata1,oenata0} : 0;
  assign mcenb = MB? {ienatb2,ienatb1,ienatb0,tb2hx,oenatb2,oenatb1,oenatb0} : 0;
`else
  // Hypertransport Interface logic
  wire ha2td,hb2td; wire [2:0] hrdyt; 
  reg ha2t,hb2t,ha2x,hb2x,rha,rhb,iholda,iholdb,iholdc;
  delaypipe #(15,1) dpa (ioclk,ha2t,ha2td);
  delaypipe #(15,1) dpb (ioclk,hb2t,hb2td);
  delaypipe #(1,3)  dph (ioclk,hrdys,hrdyt);
  wire mode = hpchn[0];
  always @(posedge ioclk) begin
    ha2t <= (route[`HA2TA] || route[`HA2TB]);
    hb2t <= (route[`HB2TA] || route[`HB2TB]);
    ha2x <= (route[`HA2TA] || route[`HA2TB] || route[`HA2CA] || route[`HA2CB] || route[`HA2R] || hx2nio);
    hb2x <= (route[`HB2TA] || route[`HB2TB] || route[`HB2CA] || route[`HB2CB] || route[`HB2R] || hx2nio);
    rha  <= rst || (ha2t^ha2td);
    rhb  <= rst || (hb2t^hb2td);
    iholdc  <= !mode && !hrdys[0] && !hrdyt[0];
    iholda  <= ha2x  && !hrdys[1] && !hrdyt[1];
    iholdb  <= hb2x  && !hrdys[2] && !hrdyt[2];
  end
  assign hihold = {iholdb,iholda,iholdc};
  assign hohold = hold;
  wire [2:0] hrios = {rhb,rha,rst};
`endif

  // output enables
  always @ (posedge ioclk) begin
    if (rst) oenan  <= 0; else if (ostart) oenan  <= (ro_act && ro_sel==0 && nio2m && !ostat[`PR]) || oni_act;
    if (rst) oenanv <= 0; else if (ostart) oenanv <= onv_act && !oni_act;
    if (rst) oenah  <= 0; else if (ostart) oenah  <= (ro_act && ro_sel==1) || (oph_act &&  oph_sel);
    if (rst) oenaha <= 0; else if (ostart) oenaha <=((ro_act && ro_sel==2) || (ohx_act && !ohx_sel))        && !bsel;
    if (rst) oenahb <= 0; else if (ostart) oenahb <=((ro_act && ro_sel==hm)|| (ohx_act && (ohx_sel|hxmix))) && (!bmux||bsel);
    if (rst) oenaca <= 0; else if (ostart) oenaca <= (ro_act && ro_sel==4) || (ocx_act && !ocx_sel)        || oca_act;
    if (rst) oenacb <= 0; else if (ostart) oenacb <= (ro_act && ro_sel==cm)|| (ocx_act && (ocx_sel|cxmix)) || ocb_act;
    if (rst) oenata <= 0; else if (ostart) oenata <= (ro_act && ro_sel==6) || (otx_act && !otx_sel);
    if (rst) oenatb <= 0; else if (ostart) oenatb <= (ro_act && ro_sel==7) || (otx_act &&  otx_sel);
    if (rst) oenahc <= 0; else if (ostart) oenahc <= inv_act;
  end

  // lock MUX controllers
  reg omuxcx,omuxhx,omuxtx;
  reg [1:0] imuxm,omuxph,imuxph,imuxcx,imuxhx,imuxtx;
  always @ (posedge ioclk) begin
   if (setomux) begin
    omuxph <= oni_act? 2 : onv_act? 3 : (nio2m && ro_sel==0)? 2 : ro_sel[0];
    omuxcx <=              ocx_act? ocx_sel : ro_sel[0];
    omuxhx <= bmux? bsel : ohx_act? ohx_sel : ro_sel[0];
    omuxtx <=              otx_act? otx_sel : ro_sel[0];
   end
   if (setimux) begin
    imuxm  <= ro_sel[2:1];
    imuxph <= (inv_act||ini_act)? (MB?2:1) : 0;
    imuxcx <= icx_act2? 2 : icx_act? 1 : 0;
    imuxtx <= itx_act2? 2 : itx_act? 1 : 0;
    imuxhx <= (onv_act|oni_act)? 3 : ihx_act2? 2 : ihx_act? 1 : 0;
   end
  end
  wire [1:0] jmuxph = (dio2p && ienad)? 3 : imuxph;

  // output bus mixers
  wire [IBW-1:0] obuscx,obusph,obushx,obustx;
  mux4xNp #(IBW) moph (obusph, obusp,obush,obusn,obusnv, omuxph,ioclk);
`ifdef DMA_QMUX
  reg ovalhx; always @(posedge ioclk) ovalhx <= oenaha || oenahb;
  reg oval; always @(posedge ioclk) oval <= ovalhx && !oval;	// iomodule only reads on every other cycle
  cmix2xNp #(IBW) mohx (obushx, obusha,obushb, omuxhx,ioclk, qmux,oval);
`else
  mix2xNp #(IBW) mohx (obushx, obusha,obushb, omuxhx,ioclk, hxmix&(!bmux),{L,hxfmt});
`endif
  mix2xNp #(IBW) mocx (obuscx, obusca,obuscb, omuxcx,ioclk, cxmix, {H,L});
  mux2xNp #(IBW) motx (obustx, obusta,obustb, omuxtx,ioclk);
  mux4xNp #(IBW) mim  (obusm,  obusph,obushx,obuscx,obustx, imuxm,ioclk);
  // ibusm has 1 cycle delay to match delay of other muxes

  // input bus mixers 
  wire [IBW-1:0] ibuscx,ibusph,ibushx,ibustx;
  mux4xNp #(IBW) miph (ibusph, jbusm,obushx,obustx,obusm,  jmuxph,ioclk);
  mux4xNp #(IBW) mihx (ibushx, jbusm,obustx,obuscx,obusph, imuxhx,ioclk);
  mux3xNp #(IBW) micx (ibuscx, jbusm,obushx,obustx,        imuxcx,ioclk);
  mux3xNp #(IBW) mitx (ibustx, jbusm,obushx,obuscx,        imuxtx,ioclk);

  assign ibusp  = ibusph;
  assign ibush  = ibusph;

  // hx input source (TX|CX|Ram) (top 2|3b vote, bott 3b vote)
  reg [7:0] iselhxs;
`ifdef ICE_MB
  always @ (posedge ioclk) if (istart) iselhxs <= ihx_act? ihx_sel : ihx_act2? ihx_sel2 : ri_sel[0];
  wire [7:0] iselhx = iselhxs;
  assign iselha = {biha,3'b0,iselhxs[3:0]};	// pass bififo mode bit
  assign iselhb = {bihb,3'b0,iselhxs[3:0]};
`else						// lots of choices as all IO to xBar uses this path
  always @ (posedge ioclk) if (istart) iselhxs <= 
	ihx_act?  {2'b10, otx_sel?oseltb[4:0]:oselta[4:0],ihx_sel} :	// tx to hx
	ihx_act2? {5'b00000,{3{ihx_sel2}}} : 				// cx to hx
	oni_act?  {5'b11010,oseln[6:4]} : 				// nio to hx
	onv_act?  {7'b1100000,ri_sel[0]} : 				// nv to hx
		  {7'b1000000,ri_sel[0]};				// ram to hx
  wire [7:0] iselhx = tx2hx? {7'b1000000,ienahb} : cx2hx? {5'b00000,{3{ienahb}}} : iselhxs;
  assign iselha = iselhx;
  assign iselhb = iselhx;
`endif
  assign ibusha = ibushx;
  assign ibushb = ibushx;

  // cx input source
  reg iselcxs;
  reg [7:0] iselcx;
  always @ (posedge ioclk) begin
    if (istart)  iselcxs <= icx_act? ohx_sel : otx_sel;
    if (istartd) iselcx  <= icx_act? (iselcxs?oselhb:oselha) : (iselcxs? oseltb:oselta);
  end
`ifdef DMA_CX2CX
  // alternate mix registers
  mux2xNp #(IBW) mca (ibusca, ibuscx,obuscb, cb2ca,ioclk);
  mux2xNp #(IBW) mcb (ibuscb, ibuscx,obusca, ca2cb,ioclk);
  reg jenaca; always @(posedge ioclk) jenaca <= cb2ca? oenacb : ienaca;
  reg jenacb; always @(posedge ioclk) jenacb <= ca2cb? oenaca : ienacb;
`else
  assign jenaca = ienaca;
  assign jenacb = ienacb;
  assign ibusca = ibuscx;
  assign ibuscb = ibuscx;
`endif
  assign iselca = iselcx;
  assign iselcb = iselcx;

  // tx input source
  reg iseltxs;
  reg [7:0] iseltx;
  wire itabr = ri_act && (ri_sel[2:1]==3);	// assumes sending NVME packets to PM
  wire [7:0] iseltx_mb = itabr? {7'b1100000,ri_sel[0]} : {2'b00,oselhb[1],oselha[1], L, itx_actx?2'h0:iseltxs?oselhb[1:0]:oselha[1:0], iseltxs};
  wire [7:0] iseltx_pm = itx_act? {5'd0,ohx_sel?oselhb[1:0]:oselha[1:0],iseltxs} : 8'd0;
  always @ (posedge ioclk) begin
    if (istart)  iseltxs <= (MB && itx_act2)? ocx_sel : ohx_sel;
    if (istartd) iseltx <= MB? iseltx_mb : iseltx_pm;
  end

`ifdef DMA_TX2TX
  // alternate mix registers
  mux2xNp #(IBW) mota (ibusta, ibustx,obustb, tb2ta,ioclk);
  mux2xNp #(IBW) motb (ibustb, ibustx,obusta, ta2tb,ioclk);
  reg jenata; always @(posedge ioclk) jenata <= tb2ta? oenatb : ienata;
  reg jenatb; always @(posedge ioclk) jenatb <= ta2tb? oenata : ienatb;
  reg [7:0] iselta; always @(posedge ioclk) iselta <= tb2ta? otx_selc : iseltx;
  reg [7:0] iseltb; always @(posedge ioclk) iseltb <= ta2tb? otx_selc : iseltx;
`else
  assign jenata = ienata;
  assign jenatb = ienatb;
  assign ibusta = ibustx;
  assign ibustb = ibustx;
`ifdef DMA_TICS
  ictxsel #(`MC_ENGINE3S) itxa (ioclk,oselta,ictrdya, ictqueva,ictquexa, ictselva,ictselxa, cnta,stata);
  ictxsel #(`MC_ENGINE4S) itxb (ioclk,oseltb,ictrdyb, ictquevb,ictquexb, ictselvb,ictselxb, cntb,statb);
  reg [3:0] ictselx; always @(posedge ioclk) if (istartd) ictselx <= ienatb? ictselxb:ictselxa;
  assign iselta = ictena? {ictselx,4'h0}:iseltx;
  assign iseltb = ictenb? {ictselx,4'h0}:iseltx;
`else
  assign iselta = iseltx;
  assign iseltb = iseltx;
`endif
`endif

  // RAM DMA register access
  wire dma_done,dio_done;
  reg dma_ocyc,dma_icyc,dma_ccyc,dma_tcyc,dma_acyc,dma_ncyc,dma_kcyc,dma_raddr,dma_iaddr,dma_waddr,rdwrd,rs0;  
  reg [4:0] ti_sel;              wire            qi0     = qi_sel[0];
  reg [7:0] oselca_,oselcb_;     wire [7:0]      oselcx_ = rs0? oselcb_ : oselca_;
  reg [TMC-1:0] oselta_,oseltb_; wire [TMC-1:0]  oseltx_ = rs0? oseltb_ : oselta_;
  // up to 4k mcore channels and 512 core channels
  wire [11:0] to_sel = MB? {6'b0,rs0?{H,oseltb_[4:0]}:oselta_[5:0]} : PC? {oseltx_[5:0],rs0} : {oseltx_,rs0};
  wire [8:0]  co_sel = MB? {3'b0,rs0?{H,oselcb_[4:0]}:oselca_[5:0]} : PC? {oselcx_[5:0],rs0} : {oselcx_,rs0};
  wire [2:0]  no_sel = nio2m? oseln[6:4] : {rs0,oseltx_[1:0]};
  wire [2:0]  ao_sel = {oseltx_,rs0};

  // rdwr - H=fromRAM L=toRAM - high first half of cycle (opposite at cp)
  wire rdwr = (IBW>=64)? !count[2] : !count[3];
  wire cp   = (IBW>=64)? (count[1:0]==2) : (count[2:0]==6);
  reg c0,c1,c2,c3,da0,da5,ondi,ondo;
  reg [11:0] daddr;
  wire c2t = (MB && dma_ocyc && !dma_ncyc && !dma_acyc && ro_sel>=6)? to_sel[0] : c2;	// special TX ports
  always @ (posedge ioclk) begin
    rs0 <= ro_sel[0];
    if (cp)     ti_sel  <= {qi0?ictquexb[3:0]:ictquexa[3:0],qi0};
    if (istart) oselca_ <= oselca;
    if (istart) oselcb_ <= oselcb;
    if (istart) oselta_ <= oselta;
    if (istart) oseltb_ <= oseltb;
    c0 <= cp; c1 <= c0; c2 <= c1; c3 <= c2;
    if (cp) begin
      ondi     <= !rdwr && qi_act && onden[qi_sel]; 		// on demand input
      ondo     <=  rdwr && ro_act && onden[ro_sel]; 		// on demand output
      dma_icyc <= !rdwr && qi_act; 				// coming from RAM
      dma_ocyc <=  rdwr && ro_act;				// going to RAM
      dma_kcyc <= !rdwr && qi_act && (qi_sel[2:1]==3) && (qi0? ictenb:ictena); // coming from RAM to TX Mixer
      dma_acyc <=  rdwr && ro_act && (ro_sel[2:1]==3) && (oseltx_[7:6]==3 && !oseltx_[4] && MB);// NVME
      dma_tcyc <=  rdwr && ro_act && (ro_sel[2:1]==3) && (oseltx_[7:6]==2 || PM || PC);
      dma_ccyc <=  rdwr && ro_act && (ro_sel[2:1]==2) && (oselcx_[7:6]==2 && tx2cx);
      dma_ncyc <=  rdwr && ro_act && (nio2m? (ro_sel==0)  	// uses upper addr for NIO on MB
				   : (ro_sel[2:1]==3) && (oseltx_[7:6]==3) && oseltx_[4] );	// for NIO on PMs
    end
    dma_raddr <= c1 && (dma_icyc || dma_ocyc || dma_tcyc);
    dma_iaddr <= dma_raddr;
    dma_waddr <= dma_iaddr;
         if (dma_tcyc) daddr <= {       to_sel   };	// 0xFFF:0x000 TX to RAM
    else if (dma_ccyc) daddr <= { 3'h0 ,co_sel   };	// 0x1FF:0x000 CX to RAM
`ifdef ICE_K8
    else if (dma_kcyc) daddr <= { 6'h3E,ti_sel,L };	// 0xF0F:0xF00 RAM to TX
    else if (dma_ncyc) daddr <= { 8'hFF,no_sel,L };	// 0xFFF:0xFF0 NIO to RAM
`endif
    else if (dma_acyc) daddr <= { 8'hFE,ao_sel,L };	// 0xFEF:0xFE0 NVM to RAM
    else if (dma_ocyc) daddr <= { 8'hFD,ro_sel,L };	// 0xFDF:0xFD0 XX to RAM
    else if (dma_icyc) daddr <= { 8'hFC,qi_sel,L };	// 0xFCF:0xFC0 RAM to XX
    if (dma_iaddr) ro_sel_ <= ro_sel;			// dma_done occurs in next cycle
    if (dma_iaddr) rdwrd <= rdwr;
    da0 <= (dma_tcyc|dma_ccyc)? L : c2t;
    da5 <= dma_ncyc?  H : L;				
  end
  assign ictqueva = dma_kcyc && c0 && !qi0;
  assign ictquevb = dma_kcyc && c0 &&  qi0;
  assign ictselva = ienata && istartd;
  assign ictselvb = ienatb && istartd;

`ifdef TRACE_DBG_DMAC
  assign test[32]=ioclk;
  wire kstat = istat[`TA];
  wire kstax = istax[`TA];
  wire [3:0] enas = oselta[11:8];
  wire [2:0] kchn = oselta[14:12];
  wire ienc = oselta[15];
  wire irdya = oselta[16];
  assign test[31:0]={cnta[2:0], kchn[2:0],oenata, ienc,irdya, ictselx[2:0],ienata,qrdym[6],stata, ti_sel[3:1],dma_kcyc,qirdyta, oselta[7:0]};
`endif

  // cp 		mode=T	mode=0	mode=DIO
  // c0	 dma_xcyc 
  // c1  daddr		X	X	Y
  // c2  raddr load	X	X	Y+1
  // c3  iaddr		X	X+1	Y+2
  //     waddr		X	X	Y
  //
  wire [31:0] brdbus1,brdbus2;
  wire [31:0] dma_wbus,dma_rbus; 
  wire [31:0] dio_wbus,dio_rbus; 
  wire bdio = baddr[14];

  // to/from RAM IO bus - the upper DSOCM up to 16K block
  pipeaddr #(RAB,IBW) padr (ioclk,L,dma_raddr,dma_iaddr,dma_waddr,dma_rbus,dma_wbus,dma_done);
  wire bwrdma = bwr & !bdio;
  wire [11:0] daddr_ = {daddr[11:6],daddr[5]|da5,daddr[4:1],daddr[0]^da0};
  dpram #(`DMA_NR,32,32) ram (ioclk,H,  daddr_,dma_waddr, dma_wbus, dma_rbus, 
				bclk,brd, baddr[13:2],bwrdma, bwrbus, brdbus1);

`ifdef DMA_DIO
  // direct PCIe IO bus - the upper DSOCM 2K block for direct PCI addressing
  pipeaddr #(RAB,IBW) hadr (ioclk,H,dma_raddr,dma_iaddr,dma_waddr,dio_rbus,dio_wbus,dio_done);
  wire bwrdio = bwr & bdio;
  wire [8:0] haddr_ = {daddr[6],daddr[5]|da5,daddr[4:0],c3,c2};
  dpram #(2,32,32) ramd (ioclk,H, haddr_,dma_waddr, dio_wbus, dio_rbus, 
			bclk,brd, baddr[10:2],bwrdio, bwrbus, brdbus2);
  always @(posedge ioclk) begin
    if (dma_ocyc && c2) iadrd <= dio_rbus;
    if (rst) ienad <= 0; else if (istartd) ienad <= ro_act && dio2p_;
  end
  assign brdbus = bdio? brdbus2 : brdbus1;
`else
  assign brdbus = brdbus1;
`endif

`ifdef DMA_NIO
  // network IO bus
  reg [1:0] iselno; reg [2:0] iselnx;
  wire [2:0] iselnt = otx_sel? oseltb[2:0] : oselta[2:0];
  wire [2:0] iselns; mux4xN #(3) m4 (iselns,  iselnt,{obushx[32],obushx[16],obushx[0]}, 
				{obuscx[32],obuscx[16],obuscx[0]},{obustx[32],obustx[16],obustx[0]}, iselno);
  wire [7:0] iseln_; cram64xNs #(8) cni (iseln_, swrbus, rnio?swrbus[13:8]:{iselnx,iselns}, ioclk,rnio);
  always @(posedge ioclk) begin 	// QSFP 1-4 sub net 1-16 for ease of timing this is not 1 cyc ahead of ibusn
    iselnx <= hx2nio? {L,H,ohx_sel} : ro_sel;
    iselno <= hx2nio? 1 : (ro_sel[2:1]==3 && !txtag)? 0 : ro_sel[2:1];
    if (rst) jenan <= 0; else if (istarte) jenan <= (ro_act && m2nio && (ro_sel!=0)) || ini_act; 
    if (rst) iseln <= 0; else if (istarte) iseln <= iseln_;
    ibusn <= hx2nio? ibusph : obusm;
  end
  assign ienan = jenan && (iseln[7:4]!=0);	// help timing
`endif

`ifdef DMA_NVM
  // NVME IO bus
  reg [7:0] iselhcu;
  always @(posedge ioclk) begin
    if (istart) iselnv <= oselha;
    //if (istart) iselhcu <= oenanv? oselnv[15:8] : 0;
    if (istart) iselhcu <= oenanv? 0 : 0;
  end
  assign ibusnv = ibusph;
  assign iselhc = {iselhcu,iselhx};
`endif

  wire idone = (p2dio? dio_done:dma_done) &&  rdwrd;
  wire odone = (dio2p? dio_done:dma_done) && !rdwrd;
  always @ (posedge ioclk) begin
    croute[`PR2R] <= (rclr && swrbus[`PR2R]) || (odone && ro_sel_==0);
    croute[`HP2R] <= (rclr && swrbus[`HP2R]) || (odone && ro_sel_==1);
    croute[`HA2R] <= (rclr && swrbus[`HA2R]) || (odone && ro_sel_==2);
    croute[`HB2R] <= (rclr && swrbus[`HB2R]) || (odone && ro_sel_==3);
    croute[`CA2R] <= (rclr && swrbus[`CA2R]) || (odone && ro_sel_==4);
    croute[`CB2R] <= (rclr && swrbus[`CB2R]) || (odone && ro_sel_==5);
    croute[`TA2R] <= (rclr && swrbus[`TA2R]) || (odone && ro_sel_==6 && L); 
    croute[`TB2R] <= (rclr && swrbus[`TB2R]) || (odone && ro_sel_==7 && L); 
    oclear <= odone && (ro_sel_!=0);
  end
  always @ (posedge ioclk) begin
    croute[`R2PR] <= (rclr && swrbus[`R2PR]) || (idone && qi_sel==0 && !onden[0]);
    croute[`R2HP] <= (rclr && swrbus[`R2HP]) || (idone && qi_sel==1 && !onden[1]);
    croute[`R2HA] <= (rclr && swrbus[`R2HA]) || (idone && qi_sel==2 && !onden[2]);
    croute[`R2HB] <= (rclr && swrbus[`R2HB]) || (idone && qi_sel==3 && !onden[3]);
    croute[`R2CA] <= (rclr && swrbus[`R2CA]) || (idone && qi_sel==4 && !onden[4]);
    croute[`R2CB] <= (rclr && swrbus[`R2CB]) || (idone && qi_sel==5 && !onden[5]);
    croute[`R2TA] <= (rclr && swrbus[`R2TA]) || (idone && qi_sel==6 && !onden[6]);
    croute[`R2TB] <= (rclr && swrbus[`R2TB]) || (idone && qi_sel==7 && !onden[7]);
    iclear <= idone && (qi_sel!=0) && !onden[qi_sel];
  end

  genvar i;
  generate
  for (i=0; i<8; i=i+1) begin
   always @ (posedge ioclk) begin
           if (!onden[i]) ondok[i] <= 0; 
      else if ( ondup[i]) ondok[i] <= 1; 
      else if (idone && qi_sel==i) ondok[i] <= 0; 
//      else if (odone && ro_sel==1 && ondup[i]) ondok[i] <= 1; 
   end
  end
  endgenerate


  // RAM DMA burst handler
  reg ado,adop,ago,apd;
  reg [35:0] acmd;
  wire busyp = L; 	// other PPC signals
  wire [1:0]  sized = (IBW>=128)? 2 : 3;	// DMA size
  always @ (posedge ioclk) begin
    ado  <= rdwr? (c1 && qi_act) : (c1 && ro_act && !dio2p_);
    adop <= rdwr? (qi_sel==0) : (ro_sel==0 && !nio2m);
    if (ado) acmd <= adop? {rdwr,L,sizep, addrp[31:0]} 
                   :       {rdwr,L,sized, L,dma_rbus[30:6],L6};
    if (rst) ago <= 0; else ago <= ado; 
    apd <= okp;
  end

  // queues for prefetch addresses
  assign qselm = {route[31:25],qi_sel,ro_sel,ri_sel};
  assign irdym = ri_act && ostarx;
  assign ordym = ro_act && istart;

`ifndef TRACE_DBG_DMAC
//  assign test = {ienaha,oenaha,ienahb,oenahb, ienah,oenah,istat[`HA],ostat[`HP]};
//  assign test = {irdyh,route[`HA2HP],route[`HB2HP],oenahb, ienah,oenah,iph_act,ohx_act};
//  assign test = {ro_act,lock3,ienap,oenap, rcyc,mcyc,bcyc,cp};
//  assign test = {ri_act,ro_act,idone,odone, ienam,oenam,dma_waddr,ado};
//  assign test = {istat[`PR],ostat[`PR], ienap,oenap, ienam,oenam, irdym,ordym};
//  assign test = {route[`TB2R],route[`TA2R],croute[`TB2R],croute[`TA2R], rclr,rset,rflg,rrst };
//  assign test = {ri_act,ro_act,ago,wreg, rclr,rset,rflg,rrst };
//  assign test = {ienata,oenata,ienatb,oenatb, afull,rxt,route[`R2HP],ostat[`TA]};
//  assign test = {ienaha,oenaha, ienah,oenah, hpfdp,hprst, hxfmt,hxmix};
//  assign test = {hprst,istart,oenata,oenaha,ienah,ctest[3],afull,ienad};
//  assign test = {ienata,oenata, oenahb,oenaha, route[`R2HP],afull, ienam,oenam};
  assign test = {ienata,oenata, oenahb,oenaha, tauz|tbuz,taux|tbux, tbux,taux};
//  assign test = ctest;
//  assign test = {oenaha, haod,haow, hbod,afull, ostat[`HA],istat[`CA],istat[`TA]};
//  assign test = 0;
`endif
  
endmodule


module ictxsel (clk,ostat,ordy, ival,isel, oval,osel, cnt,stat);
  parameter NC=8;
  localparam MC=NC-1;

  input clk;
  input [17:0] ostat;
  output reg ordy;
  input ival,oval;
  output reg [3:0] isel;
  output [3:0] osel;
  output [2:0] cnt;
  output stat;

  wire ena = ostat[17];
  wire rdy = ostat[16];
  wire [MC:0] rdys = ostat[MC:0];
  reg [MC:0] istat,jstat,kstat;
  reg [2:0] cnt;	// count 8x256 for 2K memory optimization
  reg rst;
  wire [3:0] jsel = (isel==MC)? 0 : isel+1;
  wire next = (cnt==0 & kstat[isel]==jstat[isel]) || (cnt==7 && ival);
  always @(posedge clk) begin
    istat <= rdys;
    jstat <= istat;
    rst <= !ena;
    if (rst) cnt  <= 0; else if (ival) cnt <= cnt+1;
    if (rst) isel <= 0; else if (next) isel <= jsel;
    ordy <= (cnt!=0) || (kstat[isel]!=jstat[isel]);
    if (rst) kstat <= 0; else if (ival&(cnt==4)) kstat <= kstat ^ (1<<isel);
  end
  queMxN #(5,4,1) que (clk,rst, ival,oval, isel,osel, stat);

endmodule


/*
  Specialty counter for power2 circular buffer addressing 
  Mode=1 is for HOST addressing (x64), Mode=0 for CARD addressing (x1)
  Mode=0 can wrap and stop but cannot handle nonPower2 buffers
  Mode=1 can wrap but not stop anywhere except the bottom
  Mask is the lower 5 bits in CARD mode:
    mask[4:0] = 00  4K
    mask[4:0] = 0E  64M
    mask[4:0] = 0F  128M
    mask[4:0] = 10  256M
    mask[4:0] = 12  1G
    mask[4:0] = 13  2G
*/
module pipeaddr (clk,mode,load,inc,write,d,q,finish);

  input clk,mode,load,inc,write;
  input [31:0] d;
  output [31:0] q;
  output finish;

  parameter MMSB=26;
  parameter IBW=64;

  localparam MLSB=6;
  localparam MOFF=12;
  localparam MCNT=MMSB-MOFF;
  localparam MXSB=MLSB+7;
  localparam MYSB=MXSB+7;
  localparam MHSB=(MMSB+MLSB)/2;
  localparam CINC=(IBW==256)? 4 : (IBW==128)? 2 : 1;

  reg [31:MMSB] upper;
  reg [MMSB-1:MLSB] cnt;
  reg [MLSB-1:0] lower;
  reg [4:0] mask;
  reg [4:0] match;
  reg emask,lowf,midf,mudf;
  wire reload;

  wire [2:0] cnti = mode? 1 : CINC;
  wire [MLSB-1:0]    lowm = ~(CINC-1);
  wire [MHSB-1:MLSB] midm = ~0;
  wire [MMSB-1:MHSB] mudm = ~0;

  wire L=0, H=1;

  wire [MMSB-1:MLSB] cntpm;
  wire [MMSB-1:MLSB] cntp1 = cnt+cnti;

  genvar i;
  generate
  for (i=0; i<MCNT; i=i+1) begin:pcnt
    assign cntpm[i+MOFF] = (emask && (mask<=i))? cnt[i+MOFF] : cntp1[i+MOFF];
  end
  endgenerate
  assign cntpm[MOFF-1:MLSB] = cntp1[MOFF-1:MLSB];

  wire incl = inc & (mode? H : L);
  wire incm = inc & (mode? lowf : H);
  wire incu = inc & (mode? lowf&midf&mudf : L);

  always @ (posedge clk) begin
    if (load) upper <= d[31:MMSB];     else if (incu) upper <= upper+1;
    if (load) cnt   <= d[MMSB-1:MLSB]; else if (incm) cnt   <= cntpm;
    if (load) lower <= d[MLSB-1:0];    else if (incl) lower <= lower+CINC;
    if (load) mask  <= d[4:0];
    if (load) emask <= !mode;
    if (load) lowf  <= (d[MLSB-1:0]    == lowm);
    if (load) midf  <= (d[MHSB-1:MLSB] == midm);
    if (load) mudf  <= (d[MMSB-1:MHSB] == mudm);
  end

  reg ok2reload,ok2finish;
  wire uzero = d[31:MLSB]==0;
  always @ (posedge clk) begin
    ok2finish <= mode? write && uzero : write && !upper[31];	// power2 up to 256M
    ok2reload <= mode? inc : L;
    match[4] <= (d[  32-1:MMSB]==upper);
    match[3] <= (d[MMSB-1:MYSB]==cnt[MMSB-1:MYSB]);
    match[2] <= (d[MYSB-1:MXSB]==cnt[MYSB-1:MXSB]);
    match[1] <= (d[MXSB-1:MLSB]==cnt[MXSB-1:MLSB]);
    match[0] <= (d[MLSB-1:   0]==lower);
  end

`ifdef ICE_MB
  assign q = reload? d : {upper,cnt,lower};
`else
  assign q[31:MMSB]     = upper;
  assign q[MMSB-1:MLSB] = cnt;
  assign q[MLSB-1:0]    = lower;
`endif
  assign finish = ok2finish && (match[3:1]==7);
  assign reload = ok2reload && (match[4:0]==31);

endmodule

/*
  Simplified DMA controller for lightweight usage (no RAM, no CX, just HX and TX)

  Module output FIFOs have out registration for routing

 0  lock
 1  ostart
 2  ostartd	oenax
 3  istart
 4  istartd	ienax    odata
 5  istarte              idata
 6
 7
 0  lock
*/
module dmacontrollerLite (
  sclk,scs,saddr, srd,srdbus,swr,swrbus,		// System Control
  ioclk,rst,rsto, istat,ostat, 				// Block Status
  ienaha,ibusha,iselha,oenaha,obusha,oselha, 		// Hyper/IOM bus A
  ienahb,ibushb,iselhb,oenahb,obushb,oselhb,		// Hyper/IOM bus B
  ienata,ibusta,iselta,oenata,obusta,oselta, 		// Tuner/PM bus A
  ienatb,ibustb,iseltb,oenatb,obustb,oseltb,		// Tuner/PM bus B
  hypm,test); 

  parameter IBW=32;		// Internal Bus Width
  parameter MODE=1;		// Mode 0=MainBoard 1=ProcessorModule 2=Special

  input sclk,scs,srd,swr;
  input [31:0] saddr,swrbus;
  output [31:0] srdbus;

  input ioclk,rst;
  input [15:0] istat,ostat;
  output rsto;

  input  [7:0] oselha,oselhb,oselta,oseltb;
  output [7:0] iselha,iselhb,iselta,iseltb;
  input  [IBW-1:0] obusha,obushb, obusta,obustb; 
  output [IBW-1:0] ibusha,ibushb, ibusta,ibustb; 
  output reg oenaha,oenahb, oenata,oenatb; 
  output reg ienaha,ienahb, ienata,ienatb; 

  output reg hypm;
  output [7:0] test;

  wire L=0, H=1;

  // cycle counter
  reg [3:0] count;
  reg lock;
  reg ostart,istart;
  reg ostartd,istartd,istarte;
  always @ (posedge ioclk) begin
    if (rst) count <= 0;   else count  <= count+1;
    if (rst) lock <= 0;  else lock <= (IBW==64)? (count[2:0]==0) : (count[3:0]==0);
    ostart  <= lock;
    ostartd <= ostart;
    istart  <= ostartd;
    istartd <= istart;
    istarte <= istartd;
  end

  // route registers
  reg rrst,rflg,rset,rclr,rcnt,rsto;
  reg [31:0] route, flags;
  reg [2:0] rstcnt;
  wire wreg = (swr && scs && saddr[15] && !saddr[14]);

  // route set/clear 
  always @ (posedge ioclk) begin
    rrst <= (wreg && saddr[4:2]==0);
    rflg <= (wreg && saddr[4:2]==1);
    rset <= (wreg && saddr[4:2]==2);
    rclr <= (wreg && saddr[4:2]==3);
    rcnt <= (wreg && saddr[4:2]==4);
    rsto <= rrst || (rsto && rstcnt!=7);	// triggers rst
    if (!rsto) rstcnt <= 0; else rstcnt <= rstcnt+1;
    if (rcnt) hypm <= swrbus[0];
    if (rst) flags <= 0; else if (rflg) flags <= swrbus;
    if (rst) route[15:0] <= 0;  else route[15:0] <= (route[15:0] & ~({16{rclr}} & swrbus[15:0])) | ({16{rset}} & swrbus[15:0]);
  end  

  assign srdbus = route;

  reg haod,hbod,taod,tbod;
  reg haow,hbow,taow,tbow;
  always @(posedge ioclk) begin
    haod <= (route[`HA2TA] || route[`HA2TB]);
    hbod <= (route[`HB2TA] || route[`HB2TB]);
    haow <= (route[`HA2TA]&&!istat[`TA]) ||  (route[`HA2TB]&&!istat[`TB]) || oenaha || !ostat[`HA];
    hbow <= (route[`HB2TA]&&!istat[`TA]) ||  (route[`HB2TB]&&!istat[`TB]) || oenahb || !ostat[`HB];
    taod <= (route[`TA2HA]);
    tbod <= (route[`TB2HB]);
    taow <= (route[`TA2HA]&&!istat[`HA]) || oenata || !ostat[`TA];
    tbow <= (route[`TB2HB]&&!istat[`HB]) || oenatb || !ostat[`TB];
  end

  reg oselhx,oseltx;
  reg [7:0] iselhx,iseltx;
  reg ohx_act,otx_act,ohx_sel,otx_sel;
  always @(posedge ioclk) begin
    if (lock) ohx_sel  <= (hbod && !hbow);
    if (lock) ohx_act  <= (haod && !haow) || (hbod && !hbow);
    if (lock) otx_sel  <= (tbod && !tbow);
    if (lock) otx_act  <= (taod && !taow) || (tbod && !tbow);

    if (istart) oselhx <= ohx_sel;
    if (istart) oseltx <= otx_sel;
    if (istartd) iselhx <= {3'b111,otx_sel?oseltb[3:0]:oselta[3:0],otx_sel};
    if (istartd) iseltx <= {oselhb[1:0],oselha[1:0], L,ohx_sel?oselhb[1:0]:oselha[1:0],ohx_sel};

    if (ostart) oenaha <= ohx_act && !ohx_sel;
    if (ostart) oenahb <= ohx_act &&  ohx_sel;
    if (ostart) oenata <= otx_act && !otx_sel;
    if (ostart) oenatb <= otx_act &&  otx_sel;

    if (istart) ienata <= (oenaha&&route[`HA2TA]) || (oenahb&&route[`HB2TA]);
    if (istart) ienatb <= (oenaha&&route[`HA2TB]) || (oenahb&&route[`HB2TB]);
    if (istart) ienaha <= (oenata&&route[`TA2HA]);
    if (istart) ienahb <= (oenatb&&route[`TB2HB]);
  end

  wire [IBW-1:0] obushx,obustx;
  mux2xNp #(IBW) motx (obustx, obusta,obustb, oseltx,ioclk);
  mux2xNp #(IBW) mohx (obushx, obusha,obushb, oselhx,ioclk);

  assign iselha = iselhx;
  assign iselhb = iselhx;
  assign iselta = iseltx;
  assign iseltb = iseltx;

  assign ibusha = obustx;
  assign ibushb = obustx;
  assign ibusta = obushx;
  assign ibustb = obushx;

  //assign test = {ienahb,ienaha,oenahb,oenaha, ienatb,ienata,oenatb,oenata};
  assign test = {ostat[`HA],haow,istat[`TA],ostart, ienaha,oenaha, ienata,oenata};

endmodule


/*
  Cache/RAM/Bank Access Scheduler/Optimizer

  Sequencer walks through Bank numbers 0-7 for Write then Read
  Write channels are scattered to banks on ingest - yet the cache is filled linearly
  Read channels are kept as channel streams as they must stay contiguous

  InQueue:

  0  ago 
  1  agod iago qadr
  2  
  3
  4  ago
  5  agod oago qadr
  6 
  7

  OutQueue:

  0  acl0 qadr        prw,padrx,pstat
  1  aclp padr psloto prw,padrx,pstat
  2  aclq qadr qcmd   nobnk
  3  acl  padr        pstat
  4  acl0 qadr
  5  aclp padr psloto
  6  aclq qadr qcmd   nobnk
  7  acl  padr

  Queue RAM Organization:

  Write   Addr={wbnk,wbcnt}  Info=cacheId  Cmd=acmd
  Read    Addr={rchn,rbcnt}  Info=cacheId  Cmd=acmd

  Max throughput:
    DDR  clkr * 8 * 1280/(1280+32) * 16/(16+4)

*/
module scheduler (
	ioclk,rst, ago,acmd,afull, qrdym,qselm, 
	iclk,istr,iena,ienax,iadrx, oclk,ostr,oena,oenax,oadrx,
	aclk,qcl,qcmd,refresh, rclk,rack,renl,rcntl,renu,rcntu, wclk,wack,wenl,wcntl,wenu,wcntu,
        sclk,srcfg,test); //,tdata); 

  parameter WIDE=1;		// 1:4by 2:>=8by    dma cycles 1=16 2=8
  parameter IBB=10;
  parameter MBB=9;
  parameter RACKDLY=8;
  parameter WACKDLY=4;

  localparam ILB=5-WIDE;
  localparam MLB=4-WIDE;
  localparam CBW=7-WIDE;	// 6b:8*8 5b:4*8

`ifdef DMA_PKT_4X
  localparam BAB=8;	// define bank=256 address bit
`elsif DMA_PKT_2X
  localparam BAB=7;	// define bank=128 address bit
`else
  localparam BAB=6;	// define bank=64 address bit
`endif

  localparam CLKX=`SYS_CLKX;
  localparam CLKM=`SYS_CLKM;
  localparam RACKUDLY=(CLKX>CLKM)? RACKDLY : RACKDLY-1;

  input ioclk,rst,ago;
  input [35:0] acmd;
  output afull;

  output [23:0] qrdym;
  input  [15:0] qselm;

  input aclk;
  output qcl;
  output [35:0] qcmd;
  output refresh;

  input iclk,istr,iena,oclk,ostr,oena;
  output reg ienax,oenax;
  output reg [IBB-1:0] iadrx,oadrx;

  input rclk,rack,wclk,wack;
  output reg renl,renu,wenl,wenu;
  output reg [MBB-1:0] rcntl,rcntu,wcntl,wcntu;

  input sclk;
  input [15:0] srcfg;
  output [7:0] test;

  wire H=1, L=0;
  wire [3:0] L4=0;
  wire [31:0] L32=0;
`ifdef NDRAM
  wire DDR = 0;
  wire NDR = 1;
`else
  wire DDR = 1;
  wire NDR = 0;
`endif

  genvar i;

  // extend the reset pulse for zeroing of cRAMs
  reg ready,halt,rstx,rsty; 
  reg [5:0] rstc;
  always @(posedge ioclk) begin
    rstx <= rst || (rstx && rstc!=0);
    if (!rstx) rstc <= 1; else rstc <= rstc+1;
//    ready <= ago && (NDR? acmd[31] && acmd[35] : !acmd[31]); 		// !acfg for SDRAM, acfg && arw for NDRAM
    ready <= ago && !acmd[31]; 		// !acfg to start refresh
    halt  <= rst || (halt && !ready);
  end
  always @(posedge aclk) rsty <= rstx;

  // refresh logic
  reg [10:0] count;
  reg [3:0] cturn;
  reg rzero,refreshing,refresh,refena,needrefresh,needrefresha,czero,cset,cdlx,cdly,prw;
  wire ctrn = (cturn!=0);
  wire cena = (cturn==0) && !cdly;
  reg [9:0] rcnt;
  always @(posedge sclk) begin
`ifdef SDRAM4
    rzero <= (rcnt==10'h30C);				// need 780 clocks @ 100 MHz for 7.8uS refresh tick
`else
    rzero <= (rcnt==10'h36D);				// need 780*36/32 clocks @ 100 MHz for old 7.8uS refresh tick
`endif
    if (rzero) rcnt <= 0; else rcnt <= rcnt+1;
    if (rzero) needrefresh <= refena; else if (refreshing) needrefresh <= L;
    refena <= (srcfg[15:8] != 8'hFF);
  end
  wire srcfgz = (srcfg==0);
  always @(posedge aclk) begin
    cdly <= cdlx && !refreshing;
    cset <= NDR? (count[5:0]==61) : (count[4:0]==29);
    needrefresha <= needrefresh;
    czero <= rst || (needrefresha && cset);				// need 780 clocks @ 100 MHz
    if (czero) count <= 0; else if (cena) count <= count+1;
`ifdef NDRAM
    cdlx <= L;
    if (cset) cturn <= prw? 2:1; else if (ctrn) cturn <= cturn-1;
    refresh <= refreshing && !halt && (count[5:1]==6);			// 2 clocks for NDRAM
`elsif SDRAM1
    cdlx <= (count[4:0]==12 || count[4:0]==13);
    if (cset) cturn <= prw? 4:4; else if (ctrn) cturn <= cturn-1;
    refresh <= refreshing && !halt && (count[4:0]==15);			// 1 clock for SDRAM
`elsif SDRAM2
    cdlx <= (count[4:0]==12 || count[4:0]==13);
    if (cset) cturn <= prw? 4:4; else if (ctrn) cturn <= cturn-1;
    refresh <= refreshing && !halt && (count[4:0]==15);			// 1 clock for SDRAM
`elsif SDRAM3
    cdlx <= (count[4:0]==12 || count[4:0]==13);
    if (cset) cturn <= prw? srcfg[3:0]:srcfg[7:4]; else if (ctrn) cturn <= cturn-1;	// @ 4:5
    refresh <= refreshing && !halt && (count[6:0]=={L,srcfg[13:8]});	// 1 clock for SDRAM @ 0x3F
`elsif SDRAM4
    cdlx <= L;
    if (cset) cturn <= prw? srcfg[3:0]:srcfg[7:4]; else if (ctrn) cturn <= cturn-1;	// @ 4:5
    refresh <= refreshing && !halt && (count[6:0]=={L,srcfg[13:8]});	// 1 clock for SDRAM @ 0x3F
`endif
    refreshing <= czero || (refreshing && !(cset && (srcfgz || (count[6:5]==srcfg[15:14]))));	// needs N 32 clock cycles for recovery @ 3
  end

  // request queue logic
  reg agod,acl,aclp,aclpd,aclq,afull,irdd,qrw;
  wire arw  = acmd[35];
  wire iago = agod &&  arw;
  wire oago = agod && !arw;
  wire iacl = acl  &&  prw;
  wire oacl = acl  && !prw;
  wire ird  = istr;

  // current ptr for each W/R slot is stored in distributed ram
  // there are 8 write slots (banks) followed by 8 read slots (channels)
  // counter nomenclature is Queue, Process, Read
  // DDR2 has 4|8 banks - will try to put adjacent packets in 4-bank case
  // NDRAM can write back-to-back so fake a linear bank sequence
  wire pstat,qstat,vstat,wstat;
  reg  [6:0] oqcnt;
  reg  [3:0] qadr,padr,radr,padrd,radrd,padrl;
  reg  [3:0] qdiff,rdiff;
  wire [3:0] qsloto,psloto,rsloto;
  wire [3:0] qslotro,pslotro,rslotro; 
  reg  [2:0] rpsel,psel;
  wire [7:0] rir   = {qselm[15:9],L};	// ram input route
  wire [2:0] qsel  = qselm[8:6];
  wire [2:0] wsel  = qselm[5:3];
  wire [2:0] rsel  = qselm[2:0];
  wire [2:0] pbnk  = count[CBW-1:CBW-3];
  wire [2:0] wbnk  = NDR? oqcnt[2:0] : acmd[2+BAB:BAB];
  wire [2:0] qbnk  = NDR? pbnk       : qcmd[2+BAB:BAB];	
  wire [3:0] qadrx = {arw,arw?qsel:wbnk};
  wire [3:0] padrx = {prw,prw?psel:pbnk};
  wire [3:0] radrx = {H,rsel};
  wire [3:0] wadrx = {L,wsel};

  wire [3:0] qslotp1 = rstx? L4:qsloto+1;
  wire [3:0] pslotp1 = rstx? L4:psloto+1;
  wire [3:0] rslotp1 = rstx? L4:rsloto+1;
  wire [3:0] pslotpa = psloto+acl;

  cram16xNda #(4) qslot (qsloto,qslotp1,qadr, ioclk,agod|rstx, padr, qslotro,aclk);
  cram16xNd  #(4) rslot (rsloto,rslotp1,radr, ioclk,irdd|rstx, radrx,rslotro);
  cram16xNda #(4) pslot (psloto,pslotp1,padr,  aclk,acl |rstx, radr, pslotro,ioclk);

  always @(posedge ioclk) begin
    if (ago)      qadr <= qadrx; else if (rstx) qadr <= rstc[5:2];
    if (ird|irdd) radr <= radrx; else           radr <= padrl;
    if (rstx)     oqcnt <= 0;    else if (oago) oqcnt <= oqcnt+1;
    agod  <= ago;
    irdd  <= ird;
    rdiff <= (pslotro-rsloto);
    radrd <= radr;
    afull <= wstat;
  end
  reg opass;
  wire acl0 = (count[3-WIDE:0]==0) && !refreshing;
  wire aclr = aclp;
  always @(posedge aclk) begin
    aclp  <= acl0; 
    aclpd <= aclp;
    if (!aclr) aclq <= 0; else aclq <= pstat;
    if (!aclq) acl  <= 0; else acl  <= NDR? vstat : (qbnk==pbnk);
    if (acl0|aclq) padr <= padrx;	// channel being processed
    else           padr <= qadr;	// channel just queued
    if (iacl|rsty) padrl <= padr;
    qdiff <= (qslotro-pslotpa);
    padrd <= padr;
    qrw   <= prw;
  end

  // calculate the channel full/empty status flags
  reg [15:0] qrdy,prdy,prdz,rrdy,rrdz,brdy,srdy;
  reg [7:0]  trdy,vrdy;
  wire [3:0] qmax = NDR? 7:11;
  wire [3:0] cmax = NDR? 6:7;
  wire prdyi = (qdiff!=0);	// process avail
  wire prdzi = (qdiff>1);	// process avail 2+
  wire qrdyi = (qdiff>qmax);	// process queue full
  wire brdyi = (qdiff>cmax);	// process block ready
  wire rrdyi = (rdiff!=0);	// read cache avail any
  wire rrdzi = (rdiff>1);	// read cache avail 2+
  wire srdyi = (rdiff>cmax);	// read cache full
  wire [15:0] urdy; delaypipe #(RACKUDLY,1,16) trdys (ioclk,rrdy,urdy); // wait for data to reach the cache
  generate
  for (i=0; i<16; i=i+1) begin:rdys
    wire padri = (padrd==i);
    wire radri = (radrd==i);
    always @(posedge aclk)  if (rsty) prdy[i] <= L; else if (padri) prdy[i] <= prdyi;
    always @(posedge aclk)  if (rsty) prdz[i] <= L; else if (padri) prdz[i] <= prdzi;
    always @(posedge aclk)  if (rsty) qrdy[i] <= L; else if (padri) qrdy[i] <= qrdyi;
    always @(posedge aclk)  if (rsty) brdy[i] <= L; else if (padri) brdy[i] <= brdyi;
    always @(posedge ioclk) if (rstx) rrdy[i] <= L; else if (radri) rrdy[i] <= rrdyi;
    always @(posedge ioclk) if (rstx) rrdz[i] <= L; else if (radri) rrdz[i] <= rrdzi;
    always @(posedge ioclk) if (rstx) srdy[i] <= L; else if (radri) srdy[i] <= srdyi;
  end
  for (i=0; i<8; i=i+1) begin:rdyt
    reg rseli; always @(posedge ioclk) if (istr) rseli <= (rsel==i);
    // wait for data to reach the cache on the way up but show empty immediately
    always @(posedge ioclk) trdy[i] <= urdy[i+8] && ( ((prdy[i+8])||(rseli&iena))? rrdz[i+8] : rrdy[i+8] );
//    always @(posedge ioclk) trdy[i] <=  urdy[i+8] && ( ((DDR&rir[i])||(prdy[i+8])||(rseli&iena))? rrdz[i+8] : rrdy[i+8] );
    always @(posedge aclk)  vrdy[i] <= !srdy[i+8] && ( (DDR&rir[i])? brdy[i+8] : prdy[i+8] );
  end
  endgenerate
  assign vstat =  vrdy[psel] || !prw;
  assign pstat =  prdy[padrx]; 
  assign qstat =  prdz[padrx]; 
  assign qrdym = {prdy[15:8],qrdy[15:8],trdy}; 	// {read queue avail, read queue full, read cache avail}
  assign wstat = (qrdy[7:0]!=0);		// write queue full

  // write coalesce logic
  reg [5:0] opcnt;
  wire padr7 = (padrd[2:0]==7);
  always @(posedge aclk) begin
    if (padr7) opass <= (prdy[7]) || (prdz[7:0]!=0) || opcnt[5];
    if (opass) opcnt <= 0; else if (padr7) opcnt <= opcnt+1;
  end

  // read queue channel select logic
  reg rprep,rpreq;
  wire bump = prw? rprep|rpreq : !vrdy[rpsel];
  wire rforce;
  always @(posedge aclk) begin
    rprep <= (cturn==1);
    rpreq <= NDR && !vstat && (count[2:0]==6);
    if (vrdy[0]) rpsel <= 0; else if (bump) rpsel <= rpsel+1;
    if (rprep|rpreq) psel <= rpsel;
    if (rprep) prw <= !count[CBW] && (rforce || vrdy[rpsel]);	// only lets read cycles on every other - no back to back
  end

  // calibration cycles for DDR4
  reg calcy,calup;
  reg [3:0] calcnt;
  always @(posedge aclk) begin
    calcy <= (pbnk==0) && aclpd && prw && rforce;
    calup <= prw && cset;
    if (calup) calcnt <= 0; else if (cset) calcnt <= calcnt+1;
  end
  assign rforce = L; //calcnt[2];

  // queue ram block
  wire [31:0] dcmd;
  wire [3:0] dinfo;
  wire [6:0] aslot = arw? {qadr[2:0],qsloto}:oqcnt[6:0];
  wire [6:0] bslot;
  wire [8:0] qcnt = {qadr,qsloto}; 
  wire [8:0] pcnt = {padr,psloto}; 
`ifdef MEM_DBG
  reg        tcl;  always @(posedge aclk) tcl  <= (count[1:0]==2);				// acl every 4 clocks
  reg [35:0] tcmd; always @(posedge aclk) tcmd <= {count[5],L, 2'h2, 22'h0,count[4:2],7'h0};	// 8 reads, 8 writes
  assign qcl = tcl;
  assign qcmd = tcmd;
`elsif JVM
  dpramb #(2,32,32) qram (ioclk,H, qcnt,agod,		{acmd[35],acmd[33:6],aslot[6:4]},aslot[3:0], dcmd,dinfo,
                          aclk,aclp,  pcnt,L,	L32,L4,	{qcmd[35],qcmd[33:6],bslot[6:4]},bslot[3:0]);
  assign qcmd[5:0]={4'h0,calup,calcy};
  assign qcl = acl;
  assign qcmd[34] = halt;	// high for configuration section
`else
  dpramb #(2,32,32) qram (ioclk,H, qcnt,agod,		{acmd[35],acmd[33:31],acmd[27:3],aslot[6:4]},aslot[3:0], dcmd,dinfo,
                          aclk,aclp,  pcnt,L,	L32,L4,	{qcmd[35],qcmd[33:31],qcmd[27:3],bslot[6:4]},bslot[3:0]);
  assign qcl = acl;
  assign qcmd[2:0]=0;
  assign qcmd[30:28]=0;
  assign qcmd[34] = halt;	// high for configuration section
`endif

  // cache block offsets
  wire [6:0] rnext,wnext;
  delaypipe #(RACKDLY-0,1,7) rns (aclk,bslot,rnext); 
  delaypipe #(WACKDLY-1,1,7) wns (aclk,bslot,wnext); 
  reg [6:0] inext,onext;
  always @(posedge ioclk) begin
    inext <= {radrx,rslotro};
    onext <= oqcnt;
  end

  // DRAM read/write counters
  wire rset = !renl  || (rcntl[MLB-1:0]=={MLB{H}});
  wire wset = !wack  || (wcntl[MLB-1:0]=={MLB{H}});
  always @(posedge rclk) begin
    if (rset) rcntl[MBB-1:MLB] <= NDR? {rnext[6:4],rnext[2:0]} : rnext;
    if (rset) rcntl[MLB-1:0]   <= 0; else rcntl[MLB-1:0] <= rcntl[MLB-1:0]+1;
    renl <= rack;
  end
  always @(negedge rclk) begin
    rcntu <= rcntl;
    renu <= renl;
  end
  always @(posedge wclk) begin
    if (wset) wcntl[MBB-1:MLB] <= wnext;
    if (wset) wcntl[MLB-1:0]   <= 0; else wcntl[MLB-1:0] <= wcntl[MLB-1:0]+1;
    wenl <= wack;
  end
  always @(negedge wclk) begin
    wcntu <= wcntl;
    wenu <= wenl;
  end

  // IO side counters (1 clock delay on ienax to compensate for shorter crossbar route)
  reg iset,oset;
  always @(posedge iclk) begin
    iset  <= istr;
    ienax <= iena;
    if (iset) iadrx[IBB-1:ILB] <= NDR? {inext[6:4],inext[2:0]} : inext;
    if (iset) iadrx[ILB-1:0]   <= 0; else iadrx[ILB-1:0] <= iadrx[ILB-1:0]+1;
  end
  always @(posedge oclk) begin
    oset  <= ostr;
    oenax <= oena;
    if (oset) oadrx[IBB-1:ILB] <= onext;
    if (oset) oadrx[ILB-1:0]   <= 0; else oadrx[ILB-1:0] <= oadrx[ILB-1:0]+1;
  end

  //assign test = qrdy[7:0];
  //assign test = {aclp,refreshing};
  //assign test = {aclp,refreshing,oena,iena,wack,rack,afull,prw};
  assign test = {aclp,refreshing,oena,wack, iena,rack,iacl,iago};

endmodule

module calibrate (clk,ena,dat, calib,ctest);
  parameter BW=64;
  parameter BWH=BW/2;
  input clk,ena;
  input [BW-1:0] dat;
  output [7:0] calib;
  output [3:0] ctest;

  // calibration check
  reg enad,estr,eend;
  reg [3:0] tst,errc;
  reg [7:0] tcnt,errs,terrs,checks;
  wire [BWH-1:0] lby = {dat[55:48],dat[39:32],dat[23:16],dat[ 7:0]};
  wire [BWH-1:0] uby = {dat[63:56],dat[47:40],dat[31:24],dat[15:8]};
  wire err = enad && (checks!=0);
  always @(posedge clk) begin
    enad <= ena;
    if (ena) tcnt <= tcnt+1;
    checks[0] <= dat[15:8]  != ~dat[ 7:0];
    checks[1] <= dat[31:24] != ~dat[23:16];
    checks[2] <= dat[47:40] != ~dat[39:32];
    checks[3] <= dat[63:56] != ~dat[55:48];
    checks[4] <= dat[7:4]   !=  dat[3:0];
    checks[5] <= dat[15:12] !=  dat[11:8];
    checks[6] <= dat[23:20] !=  dat[19:16];
    checks[7] <= dat[31:28] !=  dat[27:24];
    estr <= ena && (tcnt==1);
    eend <= ena && (tcnt==255);
    if (estr) errs <= 0; else if (err) errs <= errs+1;
    if (eend) terrs <= errs;
  end
  assign calib = terrs;
  assign ctest = {(terrs==0),tcnt[7],ena,err};

endmodule

