/**********************************************
 ICE Proprietary Software - do NOT disseminate
 **********************************************/
/*
  Various Shift Registers

  Jeff Schoen
  Innovative Computer Engineering, Inc.
  1/15/2003

*/
`include "mdefs.h"

module registers();

endmodule

`ifdef VIRTEX2

`elsif SPARTAN3

`elsif VIRTEX4
 `define HASDSP

`elsif SPARTAN6
 `define HASLUT6

`elsif ALTERA
  `define NOSRL
  
`else
 `define HASDSP
 `define HASLUT6
`endif

module swrbank (clk,cs,wr,addr,swrb);
  parameter BANK=0;
  parameter N=16;
  parameter OBNK=6;	// 2**N bank size in bytes, 6 = 64 byte default bank size
  parameter OTOP=20;	// 2**N address size

  input clk,cs,wr;
  input [31:0] addr;
  output [N-1:0] swrb;

  reg [N-1:0] sreg;
  reg scs; always @(posedge clk) if (!cs)  scs <= 0; else scs <= (addr[OTOP-1:OBNK]==BANK);
  reg swr; always @(posedge clk) if (!scs) swr <= 0; else swr <= wr;	// localized reg for timing

  genvar i;
  generate
  for (i=0; i<N; i=i+1) begin:blk
    always @(posedge clk) if (!swr) sreg[i] <= 0; else sreg[i] <= (addr[OBNK-1:2]==i);
  end
  endgenerate
  assign swrb = sreg;
endmodule

module swrbanks (clk,wr,addr,swrb);
  parameter BANK=0;
  parameter N=16;

  input clk,wr;
  input [31:0] addr;
  output [N-1:0] swrb;

  reg [N-1:0] sreg;
  genvar i;
  generate
  for (i=0; i<N; i=i+1) begin:blk
    always @(posedge clk) if (!wr) sreg[i] <= 0; else sreg[i] <= (addr[5:0]==i);
  end
  endgenerate
  assign swrb = sreg;
endmodule

// efficiently translate sparse register write from one clock domain to another
module resyncif (vclk,vena,vdata, wclk,wena,wdata);
  parameter BW=8;
  input vclk,vena,wclk;
  output reg wena;
  input [BW-1:0] vdata;
  output reg [BW-1:0] wdata;

  reg vphase,wphase;
  always @(posedge vclk) begin
    if (vena) wdata <= vdata;
    if (vena) vphase <= !vphase;
  end
  always @(posedge wclk) begin
    wena <= (vphase^wphase) && !wena;
    if (wena) wphase <= !wphase;
  end
endmodule

module mkclk (out,in);
  input in;
  output out;
  reg r1,r2;
  always @(posedge in) r1 <= !r1;
  always @(negedge in) r2 <= r1;
  assign out = r1 ^ r2;
endmodule

module buffer (out,in);
  input in;
  output out;
`ifdef verilator
  assign out = in;
`elsif HASLUT6
//  LUT1 #(.INIT(2'h2)) lut (.I0(in),.O(out));
  MUXF9 mux (.I0(1'b0),.I1(1'b1),.S(in),.O(out));
`else
  assign out = in;
`endif
endmodule

module ff (out,in, clk);
  parameter N=1;
  parameter PIPE=1;
  input clk;
  input [N-1:0] in;
  output [N-1:0] out; 
  reg [N-1:0] inq,inr;
  always @(posedge clk) inq <= in;
  always @(posedge clk) inr <= inq;
  assign out = (PIPE==0)? in : (PIPE==1)? inq : inr;
endmodule

module ffe (out,in, clk,en);
  parameter N=1;
  input clk,en;
  input [N-1:0] in;
  output reg [N-1:0] out; 
  always @(posedge clk) if (en) out <= in;
endmodule

module ffr (out,in, clk,rst);
  parameter N=1;
  parameter PIPE=1;
  input clk,rst;
  input [N-1:0] in;
  output [N-1:0] out; 
  reg [N-1:0] inq;
  always @(posedge clk) if (rst) inq <= 0; else inq <= in;
  generate
  if (PIPE==0) begin:p0
    assign out = in;
  end
  if (PIPE==1) begin:p1
    assign out = inq;
  end
  if (PIPE>1) begin:pN
    delaypipe #(PIPE-1,1,N) dp (clk,inq,out);
  end
  endgenerate
endmodule

module fanout (out,in);
  parameter N=1;
  input in;
  output [N-1:0] out; 
  genvar i;
  generate
  for (i=0; i<N; i=i+1) begin:blk
    BUF db (out[i], in);
    //synthesis attribute KEEP of db is "TRUE"
  end
  endgenerate
endmodule

module fanoutp (out,in, clk);
  parameter N=1;
  input clk;
  input in;
  output [N-1:0] out; 
  genvar i;
  generate
  for (i=0; i<N; i=i+1) begin:blk
    FD db (.Q(out[i]), .D(in), .C(clk));
    //synthesis attribute KEEP of db is "TRUE"
  end
  endgenerate
endmodule

module bootreset (clk, rst);
  parameter DELAY=1;
  parameter M = (DELAY<16)? 4 :  (DELAY<64)? 6 : 8;
  parameter DELAX=((DELAY&1)==1)? DELAY-1 : DELAY;
  input clk;
  output reg rst;
  wire rstn;
`ifdef NOSRL
  reg [M-1:0] cnt;
  assign rstn = (cnt == DELAX);
  always @(posedge clk) if (!rstn) cnt <= cnt+1;
`else
  delaypipe #(DELAY) dp (clk,1'b1,rstn);
`endif
  always @(posedge clk) rst <= !rstn;
endmodule

module delaypipe (clk, in, out);
  parameter TAPS=1;
  parameter PIPE=1;
  parameter N=1;
  localparam M=(TAPS>64)?7:(TAPS>32)?6:(TAPS>16)?5:4;
  input clk;
  input [N-1:0] in;
  output [N-1:0] out /* synthesis syn_keep=1 */;

  wire [N-1:0] ins;
  wire [M-1:0] delay = (TAPS-PIPE-1);
  srlMxN #(M,N,PIPE) inst (clk,1'b1,delay,in,ins);
  reg [N-1:0] inr; always @(posedge clk) inr <= in;
  assign out = (TAPS<=0)? in : (TAPS==1)? inr : ins;
endmodule

module delayp (clk, in, out);
  parameter TAPS=1;
  parameter N=1;
  localparam PIPE=1;
  localparam M=(TAPS>64)?7:(TAPS>32)?6:(TAPS>16)?5:4;
  input clk;
  input [N-1:0] in;
  output [N-1:0] out;

  reg [N-1:0] inq; 
  generate
  if (TAPS==0) begin:d0
    assign out = in;
  end
  if (TAPS==1) begin:d1
    always @(posedge clk) inq <= in;
    assign out = inq;
  end
  if (TAPS==2) begin:d2
    reg [N-1:0] inr; 
    always @(posedge clk) inr <= in;
    always @(posedge clk) inq <= inr;
    assign out = inq;
  end
  if (TAPS>2) begin:dN
    wire [M-1:0] delay = (TAPS-PIPE-1);
    srlMxN #(M,N,PIPE) inst (clk,1'b1,delay,in,out);
  end
  endgenerate

endmodule

module delaypX (clk, dly, in, out);
  input clk,in;
  input [3:0] dly;
  output out;
  srlMxN #(4,1,1) inst (clk,1'b1,dly-2,in,out);
endmodule

`ifdef XXVIVADO
module delaypA (clk, in, out);
  parameter TAPS=1;
  parameter N=1;
  parameter M=1;
  input clk;
  input [N-1:0] in [M-1:0];
  output [N-1:0] out [M-1:0] /* synthesis syn_keep=1 */;
  genvar i;
  generate
  for (i=0; i<M; i=i+1) begin
    delayp #(TAPS,N) dpi (clk,in[i],out[i]);
  end
  endgenerate
endmodule
`endif

module delayline (out, in);
  input in;
  output out /* synthesis syn_keep=1 */;
  BUF db (out, in);
  //synthesis attribute KEEP of db is "TRUE"
endmodule

module delaylineu (out, in);
  output out;
  input in;
  LUT1 #(2'b10) db (out, in);
endmodule

module sdelayline (out, in,din, sel);
  output out /* synthesis syn_keep=1 */;
  input in,din,sel;
  assign out = sel? in : din;
  //synthesis attribute KEEP of out is "TRUE"
endmodule

module delayN (out, in);
  parameter TAPS=1;
  input in;
  output out;

  wire [TAPS:0] taps;

  assign taps[0] = in;
  genvar i;
  generate
  for (i=0; i<TAPS; i=i+1) begin:blk
    delaylineu inst (taps[i+1], taps[i]);
  end
  endgenerate
  assign out = taps[TAPS];
endmodule

module tapdelay (out, in, taps);
  parameter TAPS=8;
  output out;
  input in;
  input [TAPS-1:0] taps;
`ifdef verilator
  assign out = in;
`else
  wire [TAPS:0] delay;
  assign out = delay[0];
  assign delay[TAPS] = 0;
  genvar i;
  generate
  for (i=0; i<TAPS; i=i+1) begin:blk
    sdelayline inst (delay[i], in, delay[i+1], taps[i]);
  end
  endgenerate
`endif
endmodule

module veq4p (out, in, clk);
  parameter VALUE=1;
  parameter BITS=(1<<VALUE);
  output reg out;
  input [3:0] in;
  input clk;
  wire hit;
  LUT4 #(BITS) db (hit, in[0],in[1],in[2],in[3]);
  always @(posedge clk) out <= hit;
endmodule

module vote3 (out, in0,in1,in2);
  output out;
  input in0,in1,in2;
  BUF vb ( out, (in0 && in1) || (in1 && in2) || (in2 && in0) );
endmodule

module srl16xN (clk, ena, a, d, q);
  parameter N=1;
  input clk,ena;
  input [3:0] a;
  input [N-1:0] d;
  output [N-1:0] q;

  genvar i;
  generate
  for (i=0; i<N; i=i+1) begin:blk
    SRL16E inst (q[i], a[0], a[1], a[2], a[3], ena, clk, d[i]);
  end
  endgenerate

endmodule

module srl16xNx2 (clk, ena, a, d0,d1,sel, q);
  parameter N=1;
  input clk,ena,sel;
  input [3:0] a;
  input [N-1:0] d0,d1;
  output [N-1:0] q;

  genvar i;
  generate
  for (i=0; i<N; i=i+1) begin:blk
    SRL16E inst (q[i], a[0], a[1], a[2], a[3], ena, clk, sel?d1[i]:d0[i]);
  end
  endgenerate

endmodule

module srl16xNp (clk, ena, a, d, q);
  parameter N=1;
  input clk,ena;
  input [3:0] a;
  input [N-1:0] d;
  output [N-1:0] q;

  wire[N-1:0] r;
  reg[N-1:0] q;
  genvar i;
  generate
  for (i=0; i<N; i=i+1) begin:blk
    SRL16E inst (r[i], a[0], a[1], a[2], a[3], ena, clk, d[i]);
  end
  endgenerate
  always @(posedge clk) begin
    q <= r;
  end

endmodule

module srl16xNrp (clk, ena, a, d, r, q);
  parameter N=1;
  input clk,ena;
  input [3:0] a;
  input [N-1:0] d;
  output [N-1:0] r;
  output [N-1:0] q;

  reg[N-1:0] q;
  genvar i;
  generate
  for (i=0; i<N; i=i+1) begin:blk
    SRL16E inst (r[i], a[0], a[1], a[2], a[3], ena, clk, d[i]);
  end
  endgenerate
  always @(posedge clk) begin
    q <= r;
  end

endmodule

module srlMxN (clk, ena, a, d, qp);
  parameter M=4;
  parameter N=1;
  parameter PIPE=0;
  input clk,ena;
  input [M-1:0] a;
  input [N-1:0] d;
  output [N-1:0] qp;

  wire [N-1:0] q,p;
`ifdef NOSRL
  reg [N-1:0] alt_srl [2**M-1:0];
  genvar i;
  generate
  for (i=0; i<2**M; i=i+1) begin: alt_srl_depth
    always@(posedge clk) begin
      if (ena) alt_srl[i] <= (i==0)? d : alt_srl[i-1];
    end
  end
  endgenerate
  assign q = alt_srl[a];
  reg [N-1:0] ap;
  always @(posedge clk) ap <= q;
  assign p = ap;
`else
  wire [N-1:0] c0,c1,c2,c3,c4,c5,c6,c7;
  wire [N-1:0] q0,q1,q2,q3,q4,q5,q6,q7;
  genvar i;
  generate
  for (i=0; i<N; i=i+1) begin:blk
`ifdef HASLUT6
   if (M>=7) begin:x7
    SRLC32E inst0 (.Q(q0[i]), .A(a), .CE(ena), .CLK(clk), .D( d[i]), .Q31(c0[i]));
    SRLC32E inst1 (.Q(q1[i]), .A(a), .CE(ena), .CLK(clk), .D(c0[i]), .Q31(c1[i]));
    SRLC32E inst2 (.Q(q2[i]), .A(a), .CE(ena), .CLK(clk), .D(c1[i]), .Q31(c2[i]));
    SRLC32E inst3 (.Q(q3[i]), .A(a), .CE(ena), .CLK(clk), .D(c2[i]), .Q31(c3[i]));
    mux4xN  inst4 (q[i], q0[i],q1[i],q2[i],q3[i], a[6:5]);
    mux4xNp inst5 (p[i], q0[i],q1[i],q2[i],q3[i], a[6:5], clk);
   end
   if (M==6) begin:x6
    SRLC32E inst0 (.Q(q0[i]), .A(a), .CE(ena), .CLK(clk), .D(d[i]),  .Q31(c0[i]));
    SRLC32E inst1 (.Q(q1[i]), .A(a), .CE(ena), .CLK(clk), .D(c0[i]), .Q31(c1[i]));
    mux2xN  inst2 (q[i], q0[i],q1[i], a[5]);
    mux2xNp inst3 (p[i], q0[i],q1[i], a[5], clk);
   end
   if (M==5) begin:x5
    SRLC32E inst0 (.Q(q[i]), .A(a), .CE(ena), .CLK(clk), .D(d[i]), .Q31(c0[i]));
    reg r; always @(posedge clk) r<=q[i]; assign p[i]=r;
   end
`else
   if (M>=7) begin:x7
    SRLC16E inst0 (q0[i], c0[i], a[0], a[1], a[2], a[3], ena, clk, d[i]);
    SRLC16E inst1 (q1[i], c1[i], a[0], a[1], a[2], a[3], ena, clk, c0[i]);
    SRLC16E inst2 (q2[i], c2[i], a[0], a[1], a[2], a[3], ena, clk, c1[i]);
    SRLC16E inst3 (q3[i], c3[i], a[0], a[1], a[2], a[3], ena, clk, c2[i]);
    SRLC16E inst4 (q4[i], c4[i], a[0], a[1], a[2], a[3], ena, clk, c3[i]);
    SRLC16E inst5 (q5[i], c5[i], a[0], a[1], a[2], a[3], ena, clk, c4[i]);
    SRLC16E inst6 (q6[i], c6[i], a[0], a[1], a[2], a[3], ena, clk, c5[i]);
    SRLC16E inst7 (q7[i], c7[i], a[0], a[1], a[2], a[3], ena, clk, c6[i]);
    mux8xN  inst8 (q[i], q0[i],q1[i],q2[i],q3[i],q4[i],q5[i],q6[i],q7[i], a[6:4]);
    mux8xNp inst9 (p[i], q0[i],q1[i],q2[i],q3[i],q4[i],q5[i],q6[i],q7[i], a[6:4], clk);
   end
   if (M==6) begin:x6
    SRLC16E inst0 (q0[i], c0[i], a[0], a[1], a[2], a[3], ena, clk, d[i]);
    SRLC16E inst1 (q1[i], c1[i], a[0], a[1], a[2], a[3], ena, clk, c0[i]);
    SRLC16E inst2 (q2[i], c2[i], a[0], a[1], a[2], a[3], ena, clk, c1[i]);
    SRLC16E inst3 (q3[i], c3[i], a[0], a[1], a[2], a[3], ena, clk, c2[i]);
    mux4xN  inst4 (q[i], q0[i],q1[i],q2[i],q3[i], a[5:4]);
    mux4xNp inst5 (p[i], q0[i],q1[i],q2[i],q3[i], a[5:4], clk);
   end
   if (M==5) begin:x5
    SRLC16E inst0 (q0[i], c0[i], a[0], a[1], a[2], a[3], ena, clk, d[i]);
    SRLC16E inst1 (q1[i], c1[i], a[0], a[1], a[2], a[3], ena, clk, c0[i]);
    mux2xN  inst2 (q[i], q0[i],q1[i], a[4]);
    mux2xNp inst3 (p[i], q0[i],q1[i], a[4], clk);
   end
`endif
   if (M<=4) begin:x4
    SRL16E inst0 (q[i], a[0], a[1], a[2], a[3], ena, clk, d[i]);
    reg r; always @(posedge clk) r<=q[i]; assign p[i]=r;
   end
  end
  endgenerate
`endif
  assign qp = (PIPE>0)? p:q;

endmodule


module que16xN (clk,rst, wr,rd, d,q, stat);
  parameter N=1;
  input clk,rst,wr,rd;
  input [N-1:0] d;
  output [N-1:0] q;
  output stat;

  reg[3:0] a;
  always @(posedge clk) begin
    if (rst) a <= 15;
    else if (wr && !rd) a <= a+1;
    else if (rd && !wr) a <= a-1;
  end
  assign stat = (a!=15);

  genvar i;
  generate
  for (i=0; i<N; i=i+1) begin:blk
    SRL16E inst (q[i], a[0], a[1], a[2], a[3], wr, clk, d[i]);
  end
  endgenerate

endmodule

module que16xNaf (clk,rst, wr,rd, d,q, stat,afull);
  parameter N=1;
  parameter NAF=11;
  input clk,rst,wr,rd;
  input [N-1:0] d;
  output [N-1:0] q;
  output stat,afull;

  reg[3:0] a;
  always @(posedge clk) begin
    if (rst) a <= 15;
    else if (wr && !rd) a <= a+1;
    else if (rd && !wr) a <= a-1;
  end
  assign stat = (a!=15);
  assign afull = (a>NAF && a<15);

  genvar i;
  generate
  for (i=0; i<N; i=i+1) begin:blk
    SRL16E inst (q[i], a[0], a[1], a[2], a[3], wr, clk, d[i]);
  end
  endgenerate

endmodule

module que16xNp (clk,rst, wr,rd, d,q, stat);
  parameter N=1;
  input clk,rst,wr,rd;
  input [N-1:0] d;
  output [N-1:0] q;
  output stat;

  reg[3:0] a;
  always @(posedge clk) begin
    if (rst) a <= 15;
    else if (wr && !rd) a <= a+1;
    else if (rd && !wr) a <= a-1;
  end
  assign stat = (a!=15);

  wire[N-1:0] r;
  reg[N-1:0] q;
  genvar i;
  generate
  for (i=0; i<N; i=i+1) begin:blk
    SRL16E inst (r[i], a[0], a[1], a[2], a[3], wr, clk, d[i]);
  end
  endgenerate
  always @(posedge clk) begin
    q <= r;
  end

endmodule

module queMxN (clk,rst, wr,rd, d,q, stat);
  parameter M=4;
  parameter N=1;
  parameter PIPE=0;
  input clk,rst,wr,rd;
  input [N-1:0] d;
  output [N-1:0] q;
  output stat;

  reg[M-1:0] a;
  wire [M-1:0] b = ~0;
  wire aneb = (a!=b);
  reg anebp;
  always @(posedge clk) begin
    if (rst) a <= b;
    else if (wr && !rd) a <= a+1;
    else if (rd && !wr) a <= a-1;
    if (rd) anebp <= 0; else anebp <= aneb;
  end
  assign stat = (PIPE==1)? anebp : aneb;

  srlMxN #(M,N,PIPE) inst (clk,wr,a,d,q);

endmodule

module queMxNaf (clk,rst, wr,rd, d,q, stat,afull);
  parameter M=4;
  parameter N=1;
  parameter PIPE=0;
  parameter AFULL=0;
  localparam AFC=AFULL-2;
  input clk,rst,wr,rd;
  input [N-1:0] d;
  output [N-1:0] q;
  output stat,afull;

  reg[M-1:0] a;
  wire [M-1:0] b = ~0;
  wire aneb = (a!=b);
  reg anebp,afull;
  always @(posedge clk) begin
    if (rst) a <= b;
    else if (wr && !rd) a <= a+1;
    else if (rd && !wr) a <= a-1;
    if (rd) anebp <= 0; else anebp <= aneb;
    afull <= (a>AFC) && aneb;
  end
  assign stat = (PIPE==1)? anebp : aneb;

  srlMxN #(M,N,PIPE) inst (clk,wr,a,d,q);

endmodule

module queMxNafae (clk,rst, wr,rd, d,q, afull,aempty);
  parameter M=4;
  parameter N=1;
  parameter AFULL=0;
  parameter AEMPTY=0;
  localparam AFC=AFULL-2;
  input clk,rst,wr,rd;
  input [N-1:0] d;
  output [N-1:0] q;
  output afull,aempty;

  reg[M-1:0] a;
  wire [M-1:0] b = ~0;
  wire aeqb = (a==b);
  reg afull,aempty;
  always @(posedge clk) begin
    if (rst) a <= b;
    else if (wr && !rd) a <= a+1;
    else if (rd && !wr) a <= a-1;
    afull  <= (a>AFC) && !aeqb;
    aempty <= (a<AEMPTY) || aeqb;
  end

  srlMxN #(M,N) inst (clk,wr,a,d,q);

endmodule

module queMx36a (wclk,rclk, rst,prot, wr,rd, d,q, stat);
  parameter M=4;
  localparam N=36;
  input wclk,rclk,rst,prot,wr,rd;
  input [N-1:0] d;
  output [N-1:0] q;
  output stat;

  wire H=1, L=0;
  reg [M-1:0] wa,ra,wb,wc;
  always @(posedge wclk or posedge rst) begin
    if (rst) wa <= 0; else if (wr) wa <= wa+1;
  end
  always @(posedge rclk or posedge rst) begin
    if (rst) ra <= 0; else if (rd) ra <= ra+1;
  end
  always @(posedge rclk) begin
    wb <= wa; 
    wc <= wb;
  end
  assign stat = (wc!=ra);
  wire [8:0] uwa = wa;
  wire [8:0] ura = ra;
`ifdef ALTERA
  ramNxN #(36,512,9) ram (wclk,~prot,wr,uwa,d,,
	                  rclk,H,L,ura,36'd0,q);
`else      	
  RAMB16_S36_S36 ram (
  .ADDRA(uwa), .DIA(d[31:0]), .DIPA(d[35:32]), .CLKA(wclk), .ENA(~prot), .WEA(wr), .SSRA(L), .DOA(), .DOPA(),
  .ADDRB(ura), .DIB(32'd0), .DIPB(4'd0), .CLKB(rclk), .ENB(H), .WEB(L), .SSRB(L), .DOB(q[31:0]), .DOPB(q[35:32])
  );
`endif

endmodule

module queMxNa (wclk,rclk, rst,prot, wr,rd, d,q, stat,zeros);
  parameter M=4;
  parameter N=8;
  input wclk,rclk,rst,prot,wr,rd;
  input [N-1:0] d;
  output [N-1:0] q;
  output stat,zeros;

  wire H=1, L=0;
  reg wrst,rrst;
  reg [M-1:0] wa,ra,wb,wc;
  always @(posedge wclk) begin
    wrst <= rst;
    if (wrst) wa <= 0; else if (wr) wa <= wa+1;
  end
  wire [M-1:0] rap = ra+rd;
  always @(posedge rclk) begin
    rrst <= rst;
    if (rrst) ra <= 0; else if (rd) ra <= rap;
  end
  always @(posedge rclk) begin
    wb <= wa; 
    wc <= wb;
  end
  assign stat = (wc!=ra);
  assign zeros = (wc==0) && (ra==0);
  wire x1,x2,x3;

  dpramb #(2,N,N) ram (wclk,H, wa,wr, d,L, x1,x2,
  		       rclk,H, rap,L, L,L, q,x3);

endmodule

/* 

    Growth
    Most sig bit
    Data
    Rounders

    GG MDDDDDDD RRRR

*/

module roundN2Mc (dinr, dout, round, clip, clk, clipped);

  parameter BWI=20;     // input/output width
  parameter BWO=16;     // roundTo width
  parameter BWC=2;      // upper clip width
  parameter PIPE=1;

  localparam BWD=BWI-BWO-BWC; // width difference - bottom of output
  localparam BWCU=1+BWC;      // upper clip range
  localparam BWCL=BWO-1;      // lower clip range

  input round, clip, clk;
  input [BWI-1:0] dinr;
  output [BWI-1:0] dout;
  output clipped;

  reg [BWI-1:0] dout;
  reg clipped;
  wire inc;

  // register the input
  wire  [BWI-1:0] din;
  reg   [BWI-1:0] dinq;
  always @(posedge clk) dinq <= dinr;
  assign din = (PIPE==2)? dinq : dinr;

  // clip if out of range
  wire sign = din[BWI-1];
  wire msb  = din[BWI-1-BWC];
  wire [BWC:0] msbs = din[BWI-1:BWI-1-BWC];
  wire [BWC:0] ones = {(BWC+1){1'b1}};
  wire [BWO-1:0] maxval = {1'b0,{(BWO-1){1'b1}}};

  wire maxout = (din[BWI-BWC-1:BWD] == maxval);

  wire clipping = (BWC>0) && clip && (msbs!=0) && (msbs!=ones);

  wire clipit = (clipping || maxout);

  // increment if fraction is >=0.5 unless 0.5 and even
  generate
  if (BWD>0) begin:A
    assign inc = round && din[BWD-1] && !(din[BWD-2:0]==0 && !din[BWD]);
    always @(posedge clk) begin
      if (clipit) dout[BWD-1:0] <= {BWD{!sign}};
      else	  dout[BWD-1:0] <= din[BWD-1:0];
    end
  end
  else begin:B
    assign inc = 0;
  end
  endgenerate

  always @(posedge clk) begin
    if (clipit) dout[BWI-1:BWD] <= {{BWCU{sign}},{BWCL{!sign}}};
    else        dout[BWI-1:BWD] <= din[BWI-1:BWD] + inc;
    clipped <= clipping;
//if (BWO==16) $write("Din=%x inc=%x clip=%x dout=%x  maxval=%x dinD=%x\n",din,inc,clipit,dout,maxval,din[BWI-BWC-1:BWD]);
  end

endmodule

module roundN2M (din, dout, clk);
  parameter BWI=20;     // input width
  parameter BWO=16;     // output width
  parameter BWC=0;	// upper clip width
  parameter PIPE=1;
  parameter MODE=1;
  input clk;
  input [BWI-1:0] din;
  output [BWO-1:0] dout;
  wire clipped;
  wire rnd = (MODE&1)?1:0;
  wire clip = (MODE&2)?1:0;
  wire [BWI-1:0] dtmp;
  roundN2Mc #(BWI,BWO,BWC,PIPE) inst (din, dtmp, rnd, clip, clk, clipped);
  assign dout = dtmp[BWI-BWC-1:BWI-BWC-BWO];
endmodule



module bitrevN (bi,bo,inv);
  parameter N=8;
  parameter F=N;
  localparam M=F-1;
  input inv;
  input [N-1:0] bi;
  output [N-1:0] bo;
  genvar i;
  generate
  for (i=0; i<N; i=i+1) begin:swp
    assign bo[i] = inv ^ bi[i^M];
  end
  endgenerate
endmodule

module smul (a, b, c);

  parameter WA=16;
  parameter WB=WA;
  parameter WC=WA+WB-1;

  localparam WP=WA+WB-1;

  input signed [WA-1:0] a;
  input signed [WB-1:0] b;
  output signed [WC-1:0] c;

`ifdef verilator
  wire signed [WP-1:0] cx;
  assign cx = a * b;
  assign c = $signed(cx[WP-1:WP-WC]);
`else
  wire [17:0] ax = (WA<18)? {a,{(18-WA){1'd0}}} : a[WA-1:WA-18];
  wire [17:0] bx = (WB<18)? {b,{(18-WB){1'd0}}} : b[WB-1:WB-18];
  wire [35:0] cx;
  MULT18X18 mul (.P(cx),.A(ax),.B(bx));
  assign c = $signed(cx[35-1:35-WC]);
`endif

endmodule


module srmul (a, b, c, clk,ena);

  parameter WA=16;
  parameter WB=WA;
  parameter WC=WA+WB-1;
  parameter PIPE=1;
  parameter WO=99;	// output left shift offset from bottom
  parameter RNC=0;

  localparam WP=WA+WB-1;
  localparam WOFF= (WO!=99)? WO : WP-WC;

  input signed [WA-1:0] a;
  input signed [WB-1:0] b;
  output signed [WC-1:0] c;
  input clk,ena;

`ifdef VIRTEX2
  generate
  wire [17:0] ax = (WA<18)? {a,{(18-WA){1'd0}}} : a[WA-1:WA-18];
  wire [17:0] bx = (WB<18)? {b,{(18-WB){1'd0}}} : b[WB-1:WB-18];
  wire signed [35:0] cx;
  if (PIPE==2) begin:p2
    reg [17:0] az; always @(posedge clk) if (ena) az <= ax;
    reg [17:0] bz; always @(posedge clk) if (ena) bz <= bx;
    MULT18X18S mul (.P(cx),.A(az),.B(bz),.C(clk),.CE(ena),.R(1'b0));
  end
  if (PIPE==1) begin:p1
    MULT18X18S mul (.P(cx),.A(ax),.B(bx),.C(clk),.CE(ena),.R(1'b0));
  end
  endgenerate
  assign c = $signed(cx[35-1:35-WC]);
`else
`ifdef USEDSP48E
  wire L=0, H=1;
  wire signed [WP-1:0] cx;
  DSP48E dsp (.CLK(clk),.OPMODE(8'h35),.ALUMODE(4'h0), .A(a),.B(b),.C(L),.P(cx),
              .CEP(ena),.CEC(ena),.CEM(ena),.CEA1(L),.CEA2(L),.CEB1(L),.CEB2(L));
  defparam dsp.AREG=PIPE-1, dsp.ACASCREG=0;
  defparam dsp.BREG=PIPE-1, dsp.BCASCREG=0;
  defparam dsp.CREG=0, dsp.MREG=1, dsp.PREG=0;
  defparam dsp.OPMODEREG=0, dsp.ALUMODEREG=0;
`else
  reg signed [WA-1:0] ax;
  reg signed [WB-1:0] bx;
  reg signed [WP-1:0] cx;
  always @(posedge clk) begin
    if (ena) ax <= a; 
    if (ena) bx <= b;
    if (ena) cx <= (PIPE==2)? ax * bx : a * b;
  end
`endif
  generate
  if (RNC>0) begin
    // 2 bits upper clip, 3 bits lower round
    roundN2M #(WC+2+3,WC,2,1,RNC) inst (cx[WC-1+WOFF+2:WOFF-3], c, clk);
  end
  if (RNC==0) begin
    assign c = $signed(cx[WC-1+WOFF:WOFF]);
  end
  endgenerate
`endif

endmodule

module sadd (a, b, c, clk,ena);

  parameter WA=16;
  parameter WB=WA;
  parameter WC=WA+1;

  input signed [WA-1:0] a;
  input signed [WB-1:0] b;
  output signed [WC-1:0] c;
  input clk,ena;
  reg signed [WC-1:0] c;

  always @(posedge clk) if (ena) c <= a + b;

endmodule

module rddrN (clk, dp,dn, qp,qn);
  parameter NB=8;
  input clk;
  input [NB-1:0] dp,dn;
  output reg [NB-1:0] qp,qn;
  always @(posedge clk) qp <= dp;
  always @(negedge clk) qn <= dn;
endmodule

module rddrNp (clk, dp,dn, qp,qn);
  parameter NB=8;
  parameter NP=1;
  input clk;
  input [NB-1:0] dp,dn;
  output [NB-1:0] qp,qn;
  wire [NB-1:0] bp[NP:0],bn[NP:0];

  assign bp[0] = dp;
  assign bn[0] = dn;
  genvar i;
  generate
  for (i=0; i<NP; i=i+1) begin:blk
    rddrN #(NB) inst (clk, bp[i],bn[i], bp[i+1],bn[i+1]);
  end
  endgenerate
  assign qp = bp[NP];
  assign qn = bn[NP];
endmodule

module xpm_cdc_async_rst (src_arst,dest_arst,dest_clk);
  parameter DEST_SYNC_FF=2;
  parameter RST_ACTIVE_HIGH=0;
  input src_arst,dest_clk;
  output reg dest_arst;
  reg dest_arst1;
  wire areset = (RST_ACTIVE_HIGH)? src_arst : ~src_arst;
  always @(posedge dest_clk or posedge areset) begin
    if (areset) dest_arst1 <= RST_ACTIVE_HIGH; else dest_arst1 <= src_arst;
    if (areset) dest_arst <= RST_ACTIVE_HIGH; else dest_arst <= dest_arst1;
  end
endmodule

`ifdef XXVIVADO

module vsum (clk, ival,ibus, oval,obus);
  parameter BW=32;	// Bus width
  parameter NT=1;	// Number Taps
  input clk,ival;
  input [BW-1:0] ibus [NT-1:0];
  output oval;
  output [BW-1:0] obus;

  localparam NP=(NT>32)?6:(NT>16)?5:(NT>8)?4:(NT>4)?3:(NT>2)?2:(NT>1)?1:0;
  localparam NPT=(1<<NPT);
  reg signed [BW-1:0] c[2*NPT-1:0];

  genvar i,j,k,n;
  generate
  for (n=0; n<=NP; n=n+1) begin
    localparam j = NPT>>n;
    localparam k = j*2;
    for (i=0; i<j; i=i+1) begin
      always @(posedge clk) c[j+i] <= (i>=NT)? 0 : (n==0)? ibus[i] : c[k+i] + c[k+j+i];
    end
  end
  endgenerate
  assign obus = c[1];
  delayp #(NP+1) dp (clk,ival,oval);
endmodule

`endif
