/**
  Verilog code for implementing core functions

  Jeff Schoen
  Innovative Computer Engineering, Inc.
  11/15/2013
*/

typedef logic        [15:0] uintx;	// 16
typedef logic        [17:0] uinty;	// 18
typedef logic signed [17:0] inty;	// 18

typedef logic signed [94:0] dfptxm;
typedef logic signed [62:0] fptxm;
typedef logic signed [47:0] fptxu;
typedef logic signed [41:0] fptxmw;

localparam M1W=18-1, M2W=27-1, M3W=M1W+M2W+1, M2WX=31-M2W;

`define USEDSP 1
`define USEDSPY 1
`define CHECKRANGE 1

`ifdef USEDSP
typedef logic signed [27:0] fptw;
`else
typedef logic signed [31:0] fptw;
`endif
typedef struct packed {fptw y,x;} CxFptw;

`ifdef verilator
`define LUTRAM static
`define STATIC static
`else
`define LUTRAM (* keep = "true" *) (* rom_style = "distributed" *) static
`define STATIC static 
`endif

function logic chkfpMF2 (fptx a);		// check flt function range [0,2]
  logic [31:18] au,av; au=a[31:18]; av=~au; 
  return (`CHECKRANGE!=0 && au!=0 && av!=0);
endfunction
function logic chkfpMF4 (fptx a);		// check flt function range [0,2]
  logic [31:19] au,av; au=a[31:19]; av=~au; 
  return (`CHECKRANGE!=0 && au!=0 && av!=0);
endfunction
function logic chkfpM1 (fptx a);		// check flt multiplier range [+-2]
  logic [31:M1W] au,av; au=a[31:M1W]; av=~au; 
  return (`CHECKRANGE!=0 && au!=0 && av!=0);
endfunction
function logic chkfpM2 (fptx a);		// check flt multiplier range [+-1024]
  logic [31:M2W] au,av; au=a[31:M2W]; av=~au; 
  return (`CHECKRANGE!=0 && au!=0 && av!=0);
endfunction

`ifdef USEDSP
function fptw mulff (fptx a, fptx b);		// multiply flt by flt
  logic[M3W:0] c; c = $signed(a[M2W:0]) * $signed(b[M1W:0]);
  if (chkfpM2(a)) $write("Multiply A OoR A=%x B=%x\n",a,b);
  if (chkfpM1(b)) $write("Multiply B OoR A=%x B=%x\n",a,b);
  return c[M3W:16];
endfunction
`else
function fptx mulff (fptx a, fptx b);		// multiply flt by flt
  fptxm c; c = a * b;
  return c[47:16];
endfunction
`endif

`ifdef USEDSP
function fptw mulff_fs (fptx a, fptx b);	// multiply flt by flt - full scale 1st 
  logic[M3W:0] c; c = $signed(a[31:M2WX]) * $signed(b[M1W:0]);
  if (chkfpM2(a)) $write("Multiply_fs A OoR A=%x B=%x\n",a,b);
  if (chkfpM1(b)) $write("Multiply_fs B OoR A=%x B=%x\n",a,b);
  return c[M3W-M2WX:16-M2WX];
endfunction
`else
function fptx mulff_fs (fptx a, fptx b);	// multiply flt by flt - full scale 1st 
  fptxm c; c = a * b;
  return c[47:16];
endfunction
`endif

`ifdef USEDSP
function fptw mulffrnc (fptx a, fptx b);	// multiply flt by flt with round and clip
  logic sign,exact,half,inc;
  logic[M3W:0] c; 
  logic[M3W:16] d;
  c = $signed(a[M2W:0]) * $signed(b[M1W:0]);
  sign = c[M3W];
  exact = c[14:7]==0;
  half = c[15];
//  inc = (half&&exact)? !sign : half;
  inc = half && (exact^sign);
  $write("Multiply round A=%x B=%x C=%x inc=%d \n",a,b,c,inc);
  inc = 0;
  d = c[M3W:16] + inc;
  if (chkfpM2(a)) $write("Multiply_rnc A OoR A=%x B=%x\n",a,b);
  if (chkfpM1(b)) $write("Multiply_rnc B OoR A=%x B=%x\n",a,b);
  return d;
endfunction
`else
function fptx mulffrnc (fptx a, fptx b);	// multiply flt by flt with round and clip
  fptxm c; c = a * b;
  return c[47:16];
endfunction
`endif

function CxFptx addcc (CxFptx a, CxFptx b);	// add cxflt by cxflt
  CxFptx c;
  c.x = a.x + b.x;
  c.y = a.y + b.y;
  return c;
endfunction

function CxFptx dupc (CxFptx a);		// copy cxflt to cxflt
  return a;
endfunction

function fptx mag2f (CxFptx a);			// magnitude squared of cxflt 
  return mulff(a.x,a.x) + mulff(a.y,a.y);
endfunction

function uint2 qwrap2z (CxFptx a);		// wrap cxflt by quadrant
  fptx absx; uint2 quad; 
  absx = (a.x<0)? ~a.x : a.x;
  quad = (a.y>absx)? 3 : (a.y<-absx)? 1 : (a.x<0)? 2 : 0;
  return quad;
endfunction

function CxFptx qrotc (CxFptx a, uint2 quad);	// rotate cxflt by quadrant
  CxFptx b;
  case (quad) 
    0: begin b.x =  a.x; b.y =  a.y; end
    1: begin b.x = -a.y; b.y =  a.x; end
    2: begin b.x = -a.x; b.y = -a.y; end
    3: begin b.x =  a.y; b.y = -a.x; end
  endcase
  return b;
endfunction

function CxFptx mulcc (CxFptx a, CxFptx b);	// multiply cxflt by cxflt
  CxFptx c;
  c.x = mulff(a.x,b.x) - mulff(a.y,b.y);
  c.y = mulff(a.y,b.x) + mulff(a.x,b.y);
  return c;
endfunction

function CxFptx mulccj (CxFptx a, CxFptx b);	// multiply cxflt by cxflt conjugate
  CxFptx c;
  c.x = mulff(a.x,b.x) + mulff(a.y,b.y);
  c.y = mulff(a.y,b.x) - mulff(a.x,b.y);
  return c;
endfunction

function CxFptx mulcf (CxFptx a, fptx b);	// multiply cxflt by flt
  CxFptx c;
  c.x = mulff(a.x,b);
  c.y = mulff(a.y,b);
  return c;
endfunction

function CxFptx mulfc (fptx a, CxFptx b);	// multiply flt by cxflt
  CxFptx c;
  c.x = mulff(a,b.x);
  c.y = mulff(a,b.y);
  return c;
endfunction

function CxFptx mulfcrnc (fptx a, CxFptx b);	// multiply flt by cxflt with round and clip
  CxFptx c;
  c.x = mulffrnc(a,b.x);
  c.y = mulffrnc(a,b.y);
  return c;
endfunction

function CxFptx mulcfrnc (CxFptx a, fptx b);	// multiply flt by cxflt with round and clip
  CxFptx c;
  c.x = mulffrnc(a.x,b);
  c.y = mulffrnc(a.y,b);
  return c;
endfunction

function dptx dptx2d (dptx a);			// multiply dbl by flt
  return a;
endfunction
function dptx d2dptx (dptx a);			// multiply dbl by flt
  return a;
endfunction

function dptx muldf (dptx a, fptx b);		// multiply dbl by flt
`ifdef USEDSPX
  logic[M3W:0] c; c = $signed(a[M2W+8:8]) * $signed(b[M1W:0]);
  return { {(55-M3W){c[M3W]}},c[M3W:8]};
`elsif USEDSPY
  dfptxm c; c = $signed(a[47:0]) * $signed(b[23:0]);
  return c[79:16];
`else
  dfptxm c; c = a * b;
  return c[79:16];
`endif
endfunction

function dptx mulfd (fptx a, dptx b);		// multiply flt by dbl
`ifdef USEDSPX
  logic[M3W:0] c; c = $signed(a[M2W+8:8]) * $signed(b[M1W:0]);
  return { {(55-M3W){c[M3W]}},c[M3W:8]};
`else
  dfptxm c; c = a * b;
  return c[79:16];
`endif
endfunction

`STATIC logic [25:0] pd_lrs = 25'h123456;

function fptx phaseditherf ();
  pd_lrs = iLRS(pd_lrs);
  return pd_lrs[3:0];
endfunction

function int16 bswap2 (logic [15:0] a);		// swap 2 bytes
  return {a[7:0],a[15:8]};
endfunction

function int32 bswap2s (logic [31:0] a);	// swap 2 sets of 2 bytes 
  return {a[23:16],a[31:24],a[7:0],a[15:8]};
endfunction

function int32 bswap4 (logic [31:0] a);		// byte swap 4 bytes
  return {a[7:0],a[15:8],a[23:16],a[31:24]};
endfunction

function int64 lswap (logic [63:0] a);		// swap 2 quads
  return {a[31:0],a[63:32]};
endfunction

function uint32 brev4 (logic [31:0] a);		// bit reverse 4 bytes
  logic [31:0] b; intx i;
  for (i=0; i<32; i++) b[i]=a[31-i];
  return b;
endfunction

function intx imax (intx a, intx b);		// maximum of two ints
  return (a>b)? a:b;
endfunction

function intx imin (intx a, intx b);		// minimum of two ints
  return (a>b)? b:a;
endfunction

function fptx fmaxf (fptx a, fptx b);		// maximum of two flts
  return (a>b)? a:b;
endfunction

function fptx fminf (fptx a, fptx b);		// minimum of two flts
  return (a>b)? b:a;
endfunction

function dptx fmax (dptx a, dptx b);		// maximum of two dbls
  return (a>b)? a:b;
endfunction

function dptx fmin (dptx a, dptx b);		// minimum of two dbls
  return (a>b)? b:a;
endfunction

function dptx clip (dptx a, dptx amin, dptx amax);	// bound a dbl
  return (a<amin)? amin : (a>amax)? amax : a;
endfunction

function fptx clipf (fptx a, fptx amin, fptx amax);	// bound a flt
  return (a<amin)? amin : (a>amax)? amax : a;
endfunction

function intx clipl (intx a, intx amin, intx amax);	// bound a intx
  return (a<amin)? amin : (a>amax)? amax : a;
endfunction

function dptx sclip (dptx a, dptx amax);	// symmetric bound a dbl
  return (a<~amax)? ~amax : (a>amax)? amax : a;
endfunction

function fptx sclipf (fptx a, fptx amax);	// symmetric bound a flt
  return (a<~amax)? ~amax : (a>amax)? amax : a;
endfunction

function intx sclipl (intx a, intx amax);	// symmetric bound a flt
  return (a<~amax)? ~amax : (a>amax)? amax : a;
endfunction

function fptx divffx (fptx a, fptx b);		// divide flt by flt
  fptx au,c; au = {a,16'b0}; c = au / b;
  return c;
endfunction

function fptx divff (fptx a, fptx b);		// divide flt by flt
  fptx bi,c; bi = invf(b); c = mulff(bi,a);
  return c;
endfunction

function dptx muldi (dptx a, int32 b);		// multiply dbl by int
  dptx c; c = $signed(a[32:0]) * b;
  return (b==0)? 0 : (b==1)? a : (b==2)? a<<1 : (b==4)? a<<2 : c;
endfunction

function dptx divdi (dptx a, int32 b);		// divide dbl by int
  dptx c; c = a / b;
  return c;
endfunction

function fptx mulfi (fptx a, intx b);		// multiply flt by int
`ifdef USEDSP
  fptx c; c = $signed(a[M2W-1:0]) * b;
`else
  fptx c; c = a * b;
`endif
  return (b==0)? 0 : (b==1)? a : (b==2)? a<<1 : (b==4)? a<<2 : c;
endfunction

function fptx divfi (fptx a, intx b);		// divide flt by int
  fptx bi,c; bi = invfi(b); c = a * bi;
  return c;
endfunction

function dptx divp2 (dptx a, intx b);		// divide dbl by power of 2
  dptx c; c = a >>> b[4:0];
  return c;
endfunction

function dptx mulp2 (dptx a, intx b);		// multiply dbl by power of 2
  dptx c; c = a << b[4:0];
  return c;
endfunction

function fptx divfp2 (fptx a, intx b);		// divide flt by power of 2
  fptx c; c = a >>> b[4:0];
  return c;
endfunction

function fptx mulfp2 (fptx a, intx b);		// multiply flt by power of 2
  fptx c; c = a << b[4:0];
  return c;
endfunction

function fptx invfi (intx b);			// invert int to flt
  fptx c; c = i2f(16'h0001) / b;
  return c;
endfunction

function int32 s2i (int16 a);			// convert short to int
  return {{16{a[15]}},a[15:0]};
endfunction

function int64 s2l (int16 a);			// convert short to long
  return {{48{a[15]}},a[15:0]};
endfunction

function int64 ii2l (int32 iu, int32 il);	// convert upper/lower int pair to long
  return {iu,il};
endfunction

function int16 i2s (int32 a);			// convert int to short
  return a[15:0];
endfunction

function fptx i2f (int16 a);			// convert int to flt
  return {a,16'd0};
endfunction

function fptx i2fp (int32 a);			// convert int to fixed point
  return a;
endfunction

function int32 fp2i (fptx a);			// convert int to fixed point
  return a;
endfunction

function intx f2i (fptx a);			// convert fptx to intx
  return a[31:16];
endfunction

function int32 l2i (int64 a);			// convert long to int
  return a[31:0];
endfunction

function fptx f2x (fptx a);			// convert flt to hex int
  return a;
endfunction

function dptx d2x (dptx a);			// convert flt to hex int
  return a;
endfunction

function int32 d2fi (dptx a);			// convert flt to hex int
  return a[31:0];
endfunction

function int32 d2wi (dptx a);			// convert flt to hex int
  return a[63:32];
endfunction

function dptx f2d (fptx a);			// convert flt to dbl
  return { {16{a[31]}},a,16'd0};
endfunction

function dptx i2d (int32 a);			// convert flt to dbl
  return {a[31:0],32'd0};
endfunction

function dptx l2d (int64 a);			// convert flt to dbl
  return {a[31:0],32'd0};
endfunction

function intx V2s (busx a);			// convert Value buffer to short
  return a[15:0];
endfunction

function int32 V2i (busx a);			// convert Value buffer to int
  return a[31:0];
endfunction

function fptx V2f (busx a);			// convert Value buffer to float
  return a[31:0];
endfunction

function dptx V2d (busx a);			// convert Value buffer to double
  return a[63:0];
endfunction

function eptx d2e (dptx a, uint2 exp);		// convert dbl to extended float
  eptx e;
  case (exp) 
    0: e = {a[32: 3],exp};
    1: e = {a[40:11],exp};
    2: e = {a[48:19],exp};
    3: e = {a[56:27],exp};
  endcase
  return e;
endfunction

function dptx e2d (eptx a);			// convert extended flt to dbl
  dptx d; uint2 exp; exp=a[1:0];
  case (exp) 
    0: d = {{32{a[31]}},a[30:2], 3'd0};
    1: d = {{24{a[31]}},a[30:2],11'd0};
    2: d = {{16{a[31]}},a[30:2],19'd0};
    3: d = { {8{a[31]}},a[30:2],27'd0};
  endcase
  return d;
endfunction

function fptx d2f (dptx a);			// convert dbl to flt
  return a[47:16];
endfunction

function intx d2i (dptx a);			// convert dbl to int
  return a[47:32];
endfunction

function fptx c2f (fptx a);			// convert constant as flt
  return a;
endfunction

function dptx c2d (dptx a);			// convert constant as dbl
  return a;
endfunction

function logic [3:0] IN2P (uint16 x);		// packed nibbles (as int) to packed bit
  return { x[15],x[11],x[7],x[3] };
endfunction

function logic [7:0] LN2P (uint32 x);		// packed nibbles (as long) to packed bit
  return { x[31],x[27],x[23],x[19],x[15],x[11],x[7],x[3] };
endfunction

function logic [4:0] IO2P (uint16 x);		// packed octals (as int) to packed bit
  return { x[14],x[11],x[8],x[5],x[2] };
endfunction

function logic [9:0] LO2P (uint32 x);		// packed octals (as long) to packed bit
  return { x[29],x[26],x[23],x[20],x[17],x[14],x[11],x[8],x[5],x[2] };
endfunction

function fptx K2F (int32 a);			// convert cxint (as long) to float x
  return a;
endfunction

function int32 F2K (fptx a);			// convert cxint (as long) to float x
  return a;
endfunction

function fptx CI2Fx (int32 a);			// convert cxint (as long) to float x
  return { {16{a[15]}},a[14:0],1'b0 };
endfunction

function fptx CI2Fy (int32 a);			// convert cxint (as long) to float y
  return { {16{a[31]}},a[30:16],1'b0 };
endfunction

function fptx I2F (intx a);			// convert intx to fptx with scale
  return { {16{a[15]}},a[14:0],1'b0 };
endfunction

function fptx B2F (int8 a);			// convert intx to fptx with scale
  return { {16{a[7]}},a[6:0],9'b0 };
endfunction

function fptx L2F (int32 a);			// convert intx to fptx with scale
  return { {16{a[31]}},a[30:16],1'b0 };
endfunction

function intx F2I (fptx a);			// convert fptx to intx with scale
  logic [16:0] b; b = a[16:0]+1; // round
  return b[16:1];
endfunction

function bytx F2B (fptx a);			// convert fptx to intx with scale
  logic [16:8] b; b = a[16:8]+1; // round
  return b[16:9];
endfunction

function uint32 CI2L (intx x, intx y);		// convert cxint to cxflt
  return {y,x};
endfunction

function uint32 CF2L (fptx x, fptx y);		// convert cxint to cxflt
  return {y[16:1],x[16:1]};
endfunction

function CxFptx CB2CF (CxBytx a);		// convert cxbyt to cxflt
  return {B2F(a.y),B2F(a.x)};
endfunction

function CxBytx CF2CB (CxFptx a);		// convert cxflt to cxint
  return {F2B(a.y),F2B(a.x)};
endfunction

function CxFptx CI2CF (CxIntx a);		// convert cxint to cxflt
  return {I2F(a.y),I2F(a.x)};
endfunction

function CxIntx CF2CI (CxFptx a);		// convert cxflt to cxint
  return {F2I(a.y),F2I(a.x)};
endfunction

function uint64 CF2CIS2 (int4 shf, CxFptx a, CxFptx b);	// convert cxflt to cxint
  logic [3:0] ashf; ashf = -shf;
  return {F2I(b.y>>ashf),F2I(b.x>>ashf),F2I(a.y>>ashf),F2I(a.x>>ashf)};
endfunction

function CxFptx muxf (fptx x, fptx y);		// multiplex real and imaginary into complex 
  return {y,x};
endfunction
function CxFptx muxs (intx x, intx y);		// multiplex real and imaginary into complex 
  return {y,x};
endfunction

function fptx floorf (fptx a);			// floor of flt
  return {a[31:16],16'd0};
endfunction

function dptx floor (dptx a);			// floor of dbl
  return {a[63:32],32'd0};
endfunction

function fptx fracf (fptx a);			// fractional part of flt
  return {16'h0,a[15:0]};
endfunction

function dptx frac (dptx a);			// fractional part of dbl
  return {32'h0,a[31:0]};
endfunction

function fptx wrapf (fptx a);			// wrapped frac part of flt (-0.5,0.5)
  return {{16{a[15]}},a[15:0]};
endfunction

function dptx wrap (dptx a);			// wrapped frac part of dbl (-0.5,0.5)
  return {{32{a[31]}},a[31:0]};
endfunction

function int32 iabs (int32 a);			// absolute value of int
  return a[31]? -a : a;
endfunction

function int64 labs (int64 a);			// absolute value of long
  return a[63]? -a : a;
endfunction

function fptx fabsf (fptx a);			// absolute value of flt
  return a[31]? -a : a;
endfunction

function dptx fabs (dptx a);			// absolute value of double
  return a[63]? -a : a;
endfunction

function int32 iLRS (int32 a);			// linear recursive sequence generator
  logic a0; a0 = a[0]^a[1]^a[5]^a[25];
  return {a[30:0],a0};
endfunction

function int32 iLRSP (int32 a, int32 p);	// linear recursive sequence generator with polynomial
  logic na0; na0 = ^(a&p);
  return {a[30:0],!na0};
endfunction

function fptx fdu8 (fptx duda, logic[7:0] da); // lookup of 1st diff of LUT - 8 bit dx
  logic [39:0] du; du = $signed({1'b0,da})*duda;
  return du[39:8];
endfunction

function fptx fdu10 (fptx duda, logic[9:0] da);	// lookup of 1st diff of LUT - 10 bit dx
  logic [41:0] du; du = $signed({1'b0,da})*duda;
  return du[41:10];
endfunction

function fptx fdu11 (fptx duda, logic[10:0] da); // lookup of 1st diff of LUT - 11 bit dx
  logic [42:0] du; du = $signed({1'b0,da})*duda;
  return du[42:11];
endfunction

function fptx fdu13 (fptx duda, logic[12:0] da); // lookup of 1st diff of LUT - 13 bit dx
  logic [44:0] du; du = $signed({1'b0,da})*duda;
  return du[44:13];
endfunction

function intx idu11 (intx duda, logic[10:0] da); // lookup of 1st diff of LUT - 11 bit dx
  logic [26:0] du; du = $signed({1'b0,da})*duda;
  return du[26:11];
endfunction

function intx idu12 (intx duda, logic[11:0] da); // lookup of 1st diff of LUT - 12 bit dx
  logic [27:0] du; du = $signed({1'b0,da})*duda;
  return du[27:12];
endfunction

function intx idu13 (intx duda, logic[12:0] da); // lookup of 1st diff of LUT - 13 bit dx
  logic [28:0] du; du = $signed({1'b0,da})*duda;
  return du[28:13];
endfunction

// if USEDSP LUT[0] = 16777215,-14680063 else 2147483647,-2145386495

function fptx invf (fptx a);			// inverse of flt [0,2]
  logic[2:0] ish; fptx ash; fptx b;
  ish = (a[16:11]==0)? 6 : (a[16:13]==0)? 4 : (a[16:15]==0)? 2 : 0;
  ash = a<<ish;
  b = invfx(ash);
  return b << ish;
endfunction

function fptx invfx (fptx a);			// inverse of flt [0,2]
  `LUTRAM fptx lut[0:63] = '{
    16777215,2097152,1048576,699051,524288,419430,349525,299593,262144,233017,209715,190650,174763,161319,149797,139810,
    131072,123362,116508,110376,104858,99864,95325,91181,87381,83886,80660,77672,74898,72316,69905,67650,
    65536,63550,61681,59919,58254,56680,55188,53773,52429,51150,49932,48771,47663,46603,45590,44620,
    43691,42799,41943,41121,40330,39569,38836,38130,37449,36792,36158,35545,34953,34380,33825,33288
  };
  `LUTRAM fptx lutd[0:63] = '{
    -14680063,-1048576,-349525,-174763,-104858,-69905,-49932,-37449,-29127,-23302,-19065,-15887,-13444,-11522,-9987,-8738,
    -7710,-6854,-6132,-5518,-4994,-4539,-4144,-3800,-3495,-3226,-2988,-2774,-2582,-2411,-2255,-2114,
    -1986,-1869,-1762,-1665,-1574,-1492,-1415,-1344,-1279,-1218,-1161,-1108,-1060,-1013,-970,-929,
    -892,-856,-822,-791,-761,-733,-706,-681,-657,-634,-613,-592,-573,-555,-537,-520
  };
  logic [5:0] luti; fptx b;
  luti = a[16:11];
  b = lut[luti] + fdu11(lutd[luti],a[10:0]);
  if (chkfpMF2(a)) $write("Function invf OoR %x\n",a);
  return b;
endfunction

function fptx sqrtf (fptx a);			// square root of flt [0,2]
  `STATIC fptx sqrt2 = 92682;
  `LUTRAM fptx lut[0:63] = '{
        0,11585,16384,20066,23170,25905,28378,30652, 32768,34756,36636,38424,40132,41771,43348,44869,
    46341,47767,49152,50499,51811,53090,54340,55561, 56756,57926,59073,60199,61303,62388,63455,64504,
    65536,66552,67553,68539,69511,70470,71416,72350, 73271,74182,75081,75969,76848,77716,78575,79424,
    80265,81097,81920,82735,83542,84342,85134,85918, 86696,87467,88231,88988,89739,90484,91222,91955
  };
  `LUTRAM fptx lutd[0:63] = '{
    11584,4799,3682,3104,2735,2473,2274,2116, 1988,1880,1788,1708,1639,1577,1521,1472,
    1426,1385,1347,1312,1279,1250,1221,1195,  1170,1147,1126,1104,1085,1067,1049,1032,
    1016,1001,986,972,959,946,934,921,        911,899,888,879,868,859,849,841,
    832,823,815,807,800,792,784,778,          771,764,757,751,745,738,733,727
  };
  logic[2:0] ish; logic[5:0] luti; fptx ash,b,c; logic over;
  ish = (a[16:11]==0)? 6 : (a[16:13]==0)? 4 : (a[16:15]==0)? 2 : 0;
  ash = a<<ish;
  luti = ash[16:11];
  b = lut[luti] + fdu11(lutd[luti],ash[10:0]);
  over = a[17];
  c = sqrt2 + a[15:2];				// at 2 dsqrt/dx = 1/4
  if (chkfpMF4(a)) $write("Function sqrtf OoR %x\n",a);
  return over? c : b >> ish[2:1];
endfunction

function fptx sqrtfx (fptx a);			// square root of flt [0,2]
  `LUTRAM fptx lut[0:63] = '{
        0,11585,16384,20066,23170,25905,28378,30652, 32768,34756,36636,38424,40132,41771,43348,44869,
    46341,47767,49152,50499,51811,53090,54340,55561, 56756,57926,59073,60199,61303,62388,63455,64504,
    65536,66552,67553,68539,69511,70470,71416,72350, 73271,74182,75081,75969,76848,77716,78575,79424,
    80265,81097,81920,82735,83542,84342,85134,85918, 86696,87467,88231,88988,89739,90484,91222,91955
  };
  `LUTRAM fptx lutd[0:63] = '{
    11584,4799,3682,3104,2735,2473,2274,2116, 1988,1880,1788,1708,1639,1577,1521,1472,
    1426,1385,1347,1312,1279,1250,1221,1195,  1170,1147,1126,1104,1085,1067,1049,1032,
    1016,1001,986,972,959,946,934,921,        911,899,888,879,868,859,849,841,
    832,823,815,807,800,792,784,778,          771,764,757,751,745,738,733,727
  };
  logic[5:0] luti; fptx b;
  luti = a[16:11];
  b = lut[luti] + fdu11(lutd[luti],a[10:0]);
  if (chkfpMF4(a)) $write("Function sqrtf OoR %x\n",a);
  return b;
endfunction

function fptx invsqrtf (fptx a);		// inverse square root of flt [0,2]
  `LUTRAM fptx lut[0:63] = '{
    2147483647,370728,262144,214040,185364,165794,151349,140122,131072,123576,117234,111779,107020,102821,99081,95721,
    92682,89915,87381,85051,82897,80899,79039,77302,75674,74146,72706,71347,70061,68842,67685,66585,
    65536,64535,63579,62664,61788,60947,60140,59364,58617,57898,57205,56535,55889,55265,54661,54076,
    53510,52961,52429,51912,51411,50923,50450,49989,49541,49104,48679,48265,47861,47467,47082,46707
  };
  `LUTRAM fptx lutd[0:63] = '{
    -2147112919,-108584,-48104,-28676,-19570,-14445,-11227,-9050,-7496,-6342,-5455,-4759,-4199,-3740,-3360,-3039,
    -2767,-2534,-2330,-2154,-1998,-1860,-1737,-1628,-1528,-1440,-1359,-1286,-1219,-1157,-1100,-1049,
    -1001,-956,-915,-876,-841,-807,-776,-747,-719,-693,-670,-646,-624,-604,-585,-566,
    -549,-532,-517,-501,-488,-473,-461,-448,-437,-425,-414,-404,-394,-385,-375,-366
  };
  logic[5:0] luti; luti = a[16:11];
  if (chkfpMF2(a)) $write("Function invsqrtf OoR %x\n",a);
  return lut[luti] + fdu11(lutd[luti],a[10:0]);
endfunction

function fptx sincf (fptx a);			// sine of flt
  return sincos_part3(sincos_part2(sincos_part1(a,0)));
endfunction

function fptx coscf (fptx a);			// cosine of flt
  return sincos_part3(sincos_part2(sincos_part1(a,1)));
endfunction

function fptx sincos_part1 (fptx a, qadd);	// prep for sincos LUT
  `LUTRAM uintx lutd[0:63] = '{
    1608,1608,1605,1603,1598,1594,1588,1581, 1574,1565,1555,1545,1533,1521,1508,1494,
    1478,1462,1446,1427,1410,1389,1370,1348, 1326,1304,1280,1256,1230,1205,1179,1151,
    1123,1095,1065,1036,1005,974,942,910,    877,844,810,776,740,706,670,633,
    598,560,523,486,448,410,372,333,         294,256,216,177,138,99,59,20
  };
  logic[1:0] quad; logic[5:0] luti; logic[7:0] b;
  quad = a[15:14]+qadd;
  luti = quad[0]? ~a[13:8] : a[13:8];
  b    = quad[0]? ~a[7:0]  : a[7:0];
  return {lutd[luti],quad,luti,b};
endfunction

function uinty sincos_part2 (fptx a);		// sine of flt by LUT
  `LUTRAM uintx lut[0:63] = '{
    0,1608,3216,4821,6424,8022,9616,11204,           12785,14359,15924,17479,19024,20557,22078,23586,
    25080,26558,28020,29466,30893,32303,33692,35062, 36410,37736,39040,40320,41576,42806,44011,45190,
    46341,47464,48559,49624,50660,51665,52639,53581, 54491,55368,56212,57022,57798,58538,59244,59914,
    60547,61145,61705,62228,62714,63162,63572,63944, 64277,64571,64827,65043,65220,65358,65457,65516
  };
  logic neg,mir; logic [5:0] luti; logic [7:0] b; uintx lutd_; logic[23:0] d; logic[16:0] c;
  neg = a[15];
  mir = a[14];
  luti = a[13:8];
  b = a[7:0];
  lutd_ = a[31:16];
  d = b * lutd_;
  c = lut[luti] + d[23:8];
  return {neg,c};
endfunction

function fptx sincos_part3 (uinty a);		// sine of flt by LUT
  inty b; fptx c; 
  b = a[16:0];
  c = a[17]? -b : b;
  return c;
endfunction

function CxFptx sincoscf (fptx a);
  $write("sincoscf is a task that should never be called as a function\n");
  return 0;
endfunction
function CxFptx sincoscfx (fptx a);
  $write("sincoscfx is a task that should never be called as a function\n");
  return 0;
endfunction
function CxFptx pol2recf (fptx a, fptx b);
  $write("pol2recf is a task that should never be called as a function\n");
  return 0;
endfunction

function fptx atan2cf (fptx y, fptx x);		// atan2 of flt
  return atan2cf_post(atan2cf_pre(y,x));
endfunction

function fptx atan2cf_pre (fptx y, fptx x);
  logic[1:0] quad; logic[15:0] qx,qy; logic[2:0] ish; logic[15:0] qxs,qys;
  quad = {y[17],x[17]};	// quad is 0,1,3,2
  qx = (quad==3)? -y : (quad==2)?  x : (quad==1)?  y : x;
  qy = (quad==3)? -x : (quad==2)? -y : (quad==1)? -x : y;
  ish = (qx[15:10]==0 && qy[15:10]==0)? 6 : (qx[15:12]==0 && qy[15:12]==0)? 4 : (qx[15:14]==0 && qy[15:14]==0)? 2 : 0;
  qxs = qx<<ish;
  qys = qy<<ish;
  return {qys[15:1],quad[1],qxs[15:1],quad[0]};
endfunction

function fptx atan2cf_post (fptx qxy);
  `LUTRAM intx lut[0:63] = '{
    16384,0,0,0,0,0,0,0,                          16384,8192,4836,3356,2555,2059,1723,1480,
    16384,11548,8192,6133,4836,3969,3356,2903,    16384,13028,10251,8192,6712,5637,4836,4223,
    16384,13829,11548,9672,8192,7038,6133,5415,   16384,14325,12415,10747,9346,8192,7246,6469,
    16384,14661,13028,11548,10251,9138,8192,7391, 16384,14904,13481,12161,10969,9915,8993,8192
  };
  `LUTRAM intx lutdx[0:63] = '{
    0,0,0,0,0,0,0,0,                              -8192,-3356,-1480,-801,-496,-336,-243,-183,
    -4836,-3356,-2059,-1297,-867,-613,-453,-348,  -3356,-2777,-2059,-1480,-1075,-801,-613,-481,
    -2555,-2281,-1876,-1480,-1154,-905,-718,-579, -2059,-1910,-1668,-1401,-1154,-946,-777,-643,
    -1723,-1633,-1480,-1297,-1113,-946,-801,-679, -1480,-1423,-1320,-1192,-1054,-922,-801,-694
  };
  `LUTRAM intx lutdy[0:63] = '{
    0,8192,4836,3356,2555,2059,1723,1480, 0,3356,3356,2777,2281,1910,1633,1423,
    0,1480,2059,2059,1876,1668,1480,1320, 0,801,1297,1480,1480,1401,1297,1192,
    0,496,867,1075,1154,1154,1113,1054,   0,336,613,801,905,946,946,922,
    0,243,453,613,718,777,801,801,        0,183,348,481,579,643,679,694
  };
  logic[1:0]quad; logic[15:0] x,y; logic[5:0] luti; intx b; fptx c;
  quad={qxy[16],qxy[0]};
  x=qxy[15:0]; y=qxy[31:16];
  luti = {y[15:13],x[15:13]};
  b = lut[luti] + idu13(lutdx[luti],x[12:0]) + idu13(lutdy[luti],y[12:0]);
  c = $signed({quad,b[13:0]});
  return c;
endfunction

function fptx powf (fptx a, fptx b);		// power of flt
  `LUTRAM uinty lut[0:63] = '{
    8192,11585,14188,16384,18317,20066,21673,23170,24576,25905,27169,28377,29536,30651,31727,32768,
    33776,34755,35708,36635,37540,38423,39287,40132,40960,41771,42566,43347,44115,44869,45611,46340,
    47059,47767,48464,49152,49829,50498,51159,51810,52454,53090,53718,54339,54953,55560,56161,56755,
    57344,57926,58502,59073,59638,60198,60753,61303,61848,62388,62923,63454,63981,64503,65021,65536
  };
  logic[5:0] luti; fptx c;
  luti = a[15:10];
  c = lut[luti];
  return c;
endfunction

function fptx log10f (fptx a);			// log10 of flt
  `LUTRAM fptx lut[0:63] = '{
    -21188196,-98642,-78913,-67373,-59185,-52834,-47645,-43257,-39457,-36104,-33106,-30393,-27916,-25638,-23529,-21565,
    -19728,-18003,-16376,-14837,-13377,-11989,-10664,-9399,-8188,-7026,-5910,-4836,-3801,-2802,-1837,-904,
    0,876,1725,2551,3352,4132,4891,5631,6351,7054,7740,8409,9064,9703,10329,10941,
    11540,12127,12702,13266,13818,14361,14893,15415,15928,16432,16927,17413,17891,18362,18825,19280
  };
  `LUTRAM fptx lutd[0:63] = '{
    21089554,19729,11540,8188,6351,5189,4388,3800,3353,2998,2713,2477,2278,2109,1964,1837,
    1725,1627,1539,1460,1388,1325,1265,1211,1162,1116,1074,1035,999,965,933,904,
    876,849,826,801,780,759,740,720,703,686,669,655,639,626,612,599,
    587,575,564,552,543,532,522,513,504,495,486,478,471,463,455,448
  };
  logic[5:0] luti; luti = a[16:11];
  return lut[luti] + fdu11(lutd[luti],a[10:0]);
endfunction

/*
function fptx ftmp (fptx a);			// template for fptx function
  `LUTRAM fptx lut[0:63] = '{
  };
  `LUTRAM fptx lutd[0:63] = '{
  };
  logic[5:0] luti = a[16:11];
  return lut[luti] + fdu11(lutd[luti],a);
endfunction
*/
