package nxm.ice.lib;

import nxm.ice.lib.IceHW;

/* 
   System Register Write
   RCBELAAA
   AAA - address in 128b words
     L - len in 128b quads
     E - last byte enable (for dual data cycle)
     B - last quad word enable or byte enable for config commands
     C - command bits {cfg,cyc,QvC,RvW}  or   4=wC 5=rC 6=wQ 7=rQ 8=wCfg 9=rCfg  read/write Completer/reQuester
     R - reset [28]
     A - ack   [29]
     T - XIO mode [30]
     P - Peek mode [31]

   System Status Register Read
    FEDCBBAA
    AA  - 7:user_lnk_up 6:getbusy 5:0:ltssm
    BB  - 15:8 lstat
    C	- 19-16 fstat
    D   - 23:20 xadr 
    E   - 27:24 lnk speed
    F   - 31:28 lnk width

   System Completer Queue Bits
     [15:0] addr
     [23:16] len
     [27:24] seq
     28 - Read
     29 - Write
     30 - non-Zero data

    NVME Controller Memory Map
     0x0000 CAP	Controller Capabilities
     0x0008 VS	Version
     0x000C 	Interrupt Mask Set
     0x0010 	Interrupt Mask Clear
     0x0014 CC	Controller Configuration 0=ena
     0x0018	Reserved
     0x001C CSTS Controller Status Bits 0:rdy 1:fail 3:2=shutdown
     0x0020 NSSR Subsystem Reset
     0x0024 AQA	Admin Queue Attr 11:0=SQ-Size 27:16=CQ-Size
     0x0028 ASQ	Admin Submission Queue Base Addr
     0x0030 ACQ	Admin Completion Queue Base Addr
     0x1000	Submission Queue 0 Tail Doorbell (Admin)
     0x1004	Completion Queue 0 Head Doorbell (Admin)
     0x1008	Submission Queue 1 Tail Doorbell
     0x100C	Completion Queue 1 Head Doorbell

  MSI-X
    The intel drives all have table offset=0x2000 and PBA offset=0x3000
    The 1172:8888 function is set to match this so the memory write through handles all setup from host

  BRAM Address
    Bits 19:16 are or'ed in with 11:8 to allow aliasing of 4KB page boundaries to lesser 256B boundaries
    
 */
public class IceNVME extends IceJVM {

  // Map the Controller 8K Requester BRAM 
  public final static int DSCR=0x800000;	// 8K Scratch Space 
  public final static int DBUF=0x800000;	// Device command buffer		(Memory Mapped)
  public final static int DBUFX=0x800010;	// Device read buffer data		(Memory Mapped)

  // Map the Controller 8K Completer BRAM 
  public final static int BSCR=0x810000;	// 2K-256-16 Scratch
  public final static int BMGM=0x8106F0;	// 16 Management List
  public final static int BPGL=0x810700;	// 256 PGP List
  public final static int BCQ0=0x810800;	// 256 Admin Cpl Queue
  public final static int BCQ1=0x810900;	// 256 IO Cpl Queue
  public final static int BSQ0=0x810A00;	// 512 Admin Sub Queue
  public final static int BSQ1=0x810C00;	// 1K  IO Sub Queue
  public final static int BBUF=0x811000;	// 4K  Data Buffer

  public final static int XMEM=0x002000;	// external memory
  public final static int XPGP=0x004000;	// external PGP List

  public final static int SQS0=8,  CQS0=16;	// Admin Queue Slots
  public final static int SQS1=16, CQS1=16;	// IO Queue Slots

  public final static int DCFG=0x018000;	// Device root port configuration bus	(System Bus)
  public final static int DCMD=0x014000;	// Device command word, status read	(System Bus) 

  public final static int CF_SLOTS=0x020008;	// Command Fifo slots address
  public final static int CF_SNIFF=0x020000;	// Command Fifo sniff address
  public final static int CF_XFER =0x020010;	// Command Fifo read|write address

  public final static int R_STATE=0x60;		// Current state
  public final static int R_STATUS=0x64;	// Current status
  public final static int R_QIO=0x68;		// Current queue out|in
  public final static int R_QDIF=0x6C;		// Current queue mdif|dif
  public final static int Q_WRB=0x20000000;	// 
  public final static int Q_RDB=0x10000000;	// 

  public final static int CMD_RVW=0x01000000, CMD_RVC=0x02000000, CMD_CYC=0x04000000, CMD_CFG=0x08000000; 
  public final static int CMD_RST=0x10000000, CMD_ACK=0x20000000; 

  public final static int CIDTAG=0x01000000;

  public final static int OPCODE_DELETE_SQ=0x00;
  public final static int OPCODE_CREATE_SQ=0x01;
  public final static int OPCODE_DELETE_CQ=0x04;
  public final static int OPCODE_CREATE_CQ=0x05;
  public final static int OPCODE_IDENTIFY=0x06;
  public final static int OPCODE_SET_FEATURE=0x09;

  public final static int OPCODE_FLUSH=0x0000;
  public final static int OPCODE_WRITE=0x0001;
  public final static int OPCODE_READ =0x0002;
  public final static int OPCODE_MANAGE=0x0009;

  public final static int OPCODE_USESGL=0x4000;

  static int cqhead0,cqhead1,sqtail0,sqtail1;
  static int requests,ttt;
  static int useSGL;

  public static void run() {
    int i,beat,status,cmd,slots,state=0;
    boolean linkup;

    for (i=0; i<0x100; i+=4) {
      awMem(i,0);
      awMem(HBT,i);			// bootup heartbeat monitor
    }

    resetFIFOs();
    awMem(0x18,0x1ce00000);
    useSGL=0;

    for (beat=0x1CE00000;;beat++) {
      awMem(HBT,beat);

      status = arBus(DCMD);
      linkup = (status&0x80) != 0;
      awMem(0x10,status);

      if (state==0) {
	if (linkup) {			// link up
          awMem(HBT,0x1CE0B007);
	  state = boot();
	}
      }
      else if (!linkup) {		// link down
	state=0;
      }

      slots = arBusSync(CF_SLOTS) & 0xFFF;
      if (slots>=4) processCmdFifo();	// command FIFO

      cmd = arMemSync(0x18) & 0xFF;	// command TAG
      if (cmd>0) { 
	if (cmd==99) state=fullReset();
	else command(cmd);
        awMem(0x18,0x1ce00000);
      }

      if ((beat&0xFFF)==0) {		// status reporting 
        status &= 0xFFFF;	
        status |= (state<<16);
        if (state>0) status |= (rdmem(0x1C)<<24);
        awMem(0x1C,status);
      }

      handleCompletions1();

    }

  }

  public static void waitOnFI ( int timeout) {
    while (timeout>0) {
      if ((arBus(DCMD)&0x20000)!=0) break;
      udelay(1000); timeout=timeout-1; 
    }
  }

  public static void waitOnFO ( int timeout) {
    while (timeout>0) {
      if ((arBus(DCMD)&0x40000)!=0) break;
      udelay(1000); timeout=timeout-1; 
    }
  }

  public static int fullReset() {
    awBus(DCMD,0x50000000); 	// reset with tst bit to reset PCIE core
    awBus(DCMD,0x00000000); 	// run
    return 0;
  }

  public static void resetFIFOs () {
    awBus(DCMD,0x10000000); 	// reset FIFOs
    awBus(DCMD,0x00000000); 
  }

  public static int boot() {
    int i=0,n=0;
    resetFIFOs();
    zeroMem(DSCR,0x40);
    for (n=1; i==0 && n<8; n++) {
      configRoot();
      udelay(100);
      configTarget();
      i = rdmem(0);	// is the card reading a valid ID ?
    }
    return n;
  }

  public static void command (int test) {
    int i;

    if (test==1) { for (i=0; i<0x100; i+=4) awMem(0x0800+i,(i>=0xF0)?ttt:(i>=0xE0)?arBus(DCMD):arMem(DBUF+i)); }
    if (test==1) { awMem(0x08F8,cqhead0); awMem(0x08FC,cqhead1); }
    if (test==2) { for (i=0; i<0x100; i+=4) awMem(0x0800+i,rdmem(i)); }
    if (test==2) { for (i=0; i<0x010; i+=4) awMem(0x08E0+i,rdmem(0x1000+i)); }
    if (test==2) { for (i=0; i<0x010; i+=4) awMem(0x08F0+i,rdmem(0x2000+i)); }
    if (test==3) { for (i=0; i<0x100; i+=4) awMem(0x0800+i,rdcfg(i)); }
    if (test==4) { for (i=0; i<0x100; i+=4) awMem(0x0800+i,rdcfgl(i)); }
    if (test==5) { for (i=0; i<0x100; i+=4) awMem(0x0800+i,arBus(0x00040000)); }

    if (test==6) enableController();
    if (test==6) configController();
    if (test==7) awMem(0x0800,arBus(DCMD));
    if (test==8) identify(1,512,0);
    if (test==9) disableController();

    // Admin Queues
    if (test==10) { for (i=0; i<0x100; i+=4) awMem(0x0800+i,arMem(BCQ0+i)); }
    if (test==11) { for (i=0; i<0x100; i+=4) awMem(0x0800+i,arMem(BSQ0+i)); }
    if (test==12) { for (i=0; i<0x100; i+=4) awMem(0x0800+i,arMem(BSQ0+i+0x100)); }

    // Data Queues
    if (test==20) { for (i=0; i<0x100; i+=4) awMem(0x0800+i,arMem(BCQ1+i)); }
    if (test==21) { for (i=0; i<0x100; i+=4) awMem(0x0800+i,arMem(BSQ1+i)); }
    if (test==22) { for (i=0; i<0x100; i+=4) awMem(0x0800+i,arMem(BSQ1+i+0x100)); }
    if (test==23) { for (i=0; i<0x100; i+=4) awMem(0x0800+i,arMem(BSQ1+i+0x200)); }
    if (test==24) { for (i=0; i<0x100; i+=4) awMem(0x0800+i,arMem(BSQ1+i+0x300)); }

    if (test==30) identify(0,512,0);
    if (test==31) identify(1,512,0);
    if (test==32) identify(2,512,0);
    if (test==33) identify(1,512,512);

    if (test==40) capabilities();

    if (test==98) boot();
  }

  public static void identify (int space, int len, int off) {
    int nsid = (space==0)? 1 : 0;
    int data;
    submitQueue0(OPCODE_IDENTIFY,nsid,0x1000,space,0x00);
    if (space==1) {
      data = arMem(BBUF+536);
      useSGL = ((data&0x03)!=0)? OPCODE_USESGL : 0x0;
      awMem(BBUF+316,useSGL);
    }
    copyMem(BBUF+off,0x0800,len);
  }

  public static void processCmdFifo () {
    int cmd    = arBus(CF_XFER);
    int blocks = rBus();
    int block  = rBus();
    int ublock = rBus();
         if (cmd==0x1CE0003D) rwBlocks(-1,blocks,block,ublock);
    else if (cmd==0x1CE0003E) rwBlocks( 1,blocks,block,ublock);
//    else if (cmd==0x1CE0003C) dmBlocks( 1,blocks,block,ublock);
//    else if (cmd==0x1CE0003F) dmBlocks(-1,blocks,block,ublock);
    else cleanupCmdFifo();
  }

  public static void cleanupCmdFifo () {
    while ((arBusSync(CF_SLOTS)&0xFFF) > 0) {
      int cmd = arBus(CF_SNIFF);
      if (cmd==0x1CE0003D || cmd==0x1CE0003E) return;
      arBus(CF_XFER);
    }
  }

  public static void rwBlocks (int dir, int blocks, int blockl, int blocku) {
    int cur=cqhead1;
    int opcode=(dir>0)? OPCODE_WRITE : OPCODE_READ;
    submitQueue1(opcode,useSGL,XMEM,blocku,blockl,blocks);
    while (cqhead1==cur) handleCompletions1();
  }

  // data management hints for performance
/*
  public static void dmBlocks (int dir, int cmds, int blockl, int blocku) {
    int cur=cqhead1;
    int blocks = cmds>>16;
    int cmd = (cmds>>24)&0xFF;
    int op,ctx;
    if (blocks<4) return;
    maddr = BMGM;
    if (dir>0) {	// create MGM List
      if (cmd==0x41) { ctx=0x00000130; op=1; }
      else           { ctx=0x00000630; op=2; }
      mdata = ctx;
      mdata = blocks;
      mdata = blockl;
      mdata = blocku;
    } else {		// remove MGM List
      op = 0;
      mdata = 0;
    }
    submitQueue1 (OPCODE_MANAGE,0,BMGM&0xFFFF,op,0,1);
    while (cqhead1==cur) handleCompletions1();
  }
*/

  public static int rdcfgl (int adr) {
    arBus(DCFG+adr);
    return arBus(DCFG+adr);
  }

  public static int wrcfgl (int adr, int val) {
    awBus(DCFG+adr,val);
    return val;
  }

  public static void clearresp() {
    aBus(DCMD);		// ravail is status[14]
    for (int i=0; i<1000 && (rBus()&0x4000)!=0; i++) {
      wBus(0x07F05000); udelay(1);
    }
  }

  public static void wait4resp() {
    aBus(DCMD);		// rbusy is status[6]
    for (int i=0; i<1000 && (rBus()&0x40)!=0; i++);
  }

  public static int getresp() {
    aBus(DCMD);		// ravail is status[14]
    for (int i=0; i<1000 && (rBus()&0x4000)==0; i++);
    wBus(0x07F05001); 
    maddr = DBUFX;
    return mdata;
  }

  public static int rdcfg (int adr) {
    clearresp();
    maddr = DBUF;
    mdata = adr; 	// addr[31:0]
    mdata = 0x00000000;	// addr[63:32]
    mdata = 0x00004001;	// RequesterID:ReqType:Count
    mdata = CIDTAG;	// IDen:CompleterID:Tag
    awBus(DCMD,0x06F01000); 
    return getresp();
  }

  public static int wrcfg (int adr, int val) {
    maddr = DBUF;
    mdata = adr; 	// addr[31:0]
    mdata = 0x00000000;	// addr[63:32]
    mdata = 0x00005001;	// RequesterID:ReqType:Count
    mdata = CIDTAG;	// IDen:CompleterID:Tag
    mdata = val;
    awBus(DCMD,0x06102001); 
    return getresp();
  }

  public static int rdmem (int adr) {
    clearresp();
    maddr = DBUF;
    mdata = 0x1ce10000|adr; // addr[31:0]
    mdata = 0x00000000;	// addr[63:32]
    mdata = 0x00000001;	// RequesterID:ReqType:Count  - must be 8by read or no return
    mdata = CIDTAG;	// IDen:CompleterID:Tag
    awBus(DCMD,0x06F01000); 
    return getresp();
  }

  public static void  wrmem (int adr, int val1) {
    maddr = DBUF;
    mdata = 0x1ce10000|adr; // addr[31:0]
    mdata = 0x00000000;	// addr[63:32]
    mdata = 0x00000801;	// RequesterID:ReqType:Count
    mdata = CIDTAG;	// IDen:CompleterID:Tag
    mdata = val1;
    awBus(DCMD,0x06102001); 
  }

  public static void wrmemx (int adr, int valu, int vall) {
    maddr = DBUF;
    mdata = 0x1ce10000|adr; // addr[31:0]
    mdata = 0x00000000;	// addr[63:32]
    mdata = 0x00000802;	// RequesterID:ReqType:Count
    mdata = CIDTAG;	// IDen:CompleterID:Tag
    mdata = vall;
    mdata = valu;
    awBus(DCMD,0x06382001); 
  }

  public static int sndmsgD (int type, int val) {
    maddr = DBUF;
    mdata = 0x00000000;	// addr[31:0]
    mdata = 0x00000000;	// addr[63:32]
    mdata = 0x00006001;	// RequesterID:ReqType:Count
    mdata = 0x01040000|(type<<8);// IDen:MsgType:Tag
    mdata = val;
    awBus(DCMD,0x06102001); 
    return val;
  }

  public static void configRoot () {
    wrcfgl(0x18,0x00010100);	// set bus number 
    wrcfgl(0x04,0x80000006);
    wrcfgl(0x20,0x40001000);
    wrcfgl(0x24,0xFFFF0000);
    wrcfgl(0x28,0x00000001);
    wrcfgl(0x2C,0x00000002);
    wrcfgl(0x30,0x80000001);
  }

  public static void configTarget () {
    wrcfg(0x04,0xFFFF0000);
    wrcfg(0x10,0xFFFFFFFF);
    wrcfg(0x14,0xFFFFFFFF);
    wrcfg(0x18,0xFFFFFFFF);
    wrcfg(0x1C,0xFFFFFFFF);
    wrcfg(0x10,0x1ce10004);	// Base Addr 0 lower
    wrcfg(0x14,0x00000000);	// Base Addr 0 upper
    wrcfg(0x04,0xF0000006);	// Status Mem|Master
  }

  public static void enableController() {
    cqhead0=0; cqhead1=0;
    sqtail0=0; sqtail1=0;
    requests=0;
    wrmem(0x0014,0x00460000);		// ensure disable
    maddr = BPGL;			// create PGP List
    for (int i=1; i<=32; i++) {
      mdata = (i<<12);
      mdata = 0x2000;
    }
    zeroMem(BSCR,0x40);
    zeroMem(BSQ0,SQS0<<4);
    zeroMem(BCQ0,CQS0<<2);
    zeroMem(BSQ1,SQS1<<4);
    zeroMem(BCQ1,CQS1<<2);
    wrmem(0x000C,0xFFFFFFFF);
    wrmem(0x0010,0xFFFFFFFF);
    wrmem(0x0024,0x000F0007);		// Admin Queue Sizes 16 Cpl & 8 Sub
    wrmemx(0x0028,0xA00,0x00000000);	// Admin Sub Base Addr	(special BRAM map)
    wrmemx(0x0030,0x800,0x00000000);	// Admin Cpl Base Addr	(special BRAM map)
    wrmem(0x1000,sqtail0);		// Admin Tail Doorbell
    wrmem(0x1004,cqhead0);		// Admin Head Doorbell
    wrmem(0x1008,sqtail1);		// IO Tail Doorbell
    wrmem(0x100C,cqhead1);		// IO Head Doorbell
    wrmem(0x0014,0x00460001);		// Enable Controller bit
  }

  public static void configController() {
    submitQueue0(OPCODE_SET_FEATURE,0,0,0x07,0x00000000);
    submitQueue0(OPCODE_CREATE_CQ,0,BCQ1&0xFFFF,0x000F0001,0x00000001);
    submitQueue0(OPCODE_CREATE_SQ,0,BSQ1&0xFFFF,0x000F0001,0x00010001);
  }

  public static void disableController() {
    submitQueue0(OPCODE_DELETE_CQ,0,BCQ1&0xFFFF,0x01,0x0);
    submitQueue0(OPCODE_DELETE_SQ,0,BSQ1&0xFFFF,0x01,0x0);
    wrmem(0x0014,0x00460000);
  }

  public static void zeroMem (int addr, int size) {
    maddr = addr;
    for (int i=0; i<size; i++) mdata = 0;
  }

  public static void submitQueue0 (int opcode, int nsid, int uaddr, int dw10, int dw11) {
    int index = sqtail0 & 0x7;
    int qaddr = BSQ0 + (index<<6);
    maddr = qaddr;			// DW0 Command OpCode
    mdata = ((0x1ce0|index)<<16) | opcode;
    mdata = nsid;			// DW1 NSID
    maddr = qaddr+24;			// DW6 Data Buffer Address
    mdata = 0;				// PRP address
    mdata = uaddr;
    mdata = 0;				// DW8
    mdata = 0;
    mdata = dw10;			// DW10 word
    mdata = dw11;			// DW11 word
    mdata = 0;				// DW12 word
    sqtail0++;
    index = sqtail0 & 0x7;
    wrmem(0x1000,index);		// Admin Tail Doorbell
    int cur = cqhead0;
    for (int i=0; i<1000 && cqhead0==cur; i++) {
      udelay(1000);
      handleCompletions0();
    }
  }

  public static void submitQueue1 (int opcode, int sgl, int uaddr, int ublock, int block, int blocks) {
    int index = sqtail1 & 0xF;
    int qaddr = BSQ1 + (index<<6);
    maddr = qaddr;			// DW0 Command OpCode
    mdata = ((0x1ce0|index)<<16) | sgl | opcode;
    mdata = 0x1;			// DW1 NSID
    maddr = qaddr+24;			// DW6 Offset
    mdata = 0;				// DW6 Data Buffer1 or SGL Address 
    mdata = uaddr;
    if (sgl!=0) {
      mdata = blocks<<9;		// DW8 Data Buffer SGL Size
      mdata = 0x00000000;		// DW9 Data Buffer SGL Type
    }
    else if (blocks<=8) {
      mdata = 0x0;			// DW8 Data Buffer2 Address
      mdata = 0x0;
    }
    else if (blocks<=16) {
      mdata = 0x1000;			// DW8 Data Buffer2 Address
      mdata = uaddr;
    }
    else {
      mdata = 0x0;			// DW8 PGP List Address
      mdata = 0x0700;
    }
    mdata = block;			// DW10 word or MGM NR num ranges
    mdata = ublock;			// DW11 word or MGM type 1=read 2=write 4=release
    mdata = blocks-1;			// DW12 word
    mdata = 0x52;			// DW13 sequential req, Idle, infrequent RW
    sqtail1++;
    index = sqtail1 & 0xF;
    wrmem(0x1008,index);		// IO Tail Doorbell
  }

  public static void copyMem (int addri, int addro, int bytes) {
    int words = bytes>>2;
    for (int i=0; i<words; i++) {
      awMem(addro,arMem(addri));
      addri = addri + 4;
      addro = addro + 4;
    }
  }

  public static int handleCompletions (int head, int qaddr) {
    for (int i=0; i<32; i++) {
      int index = head & 0xF;
      int caddr = qaddr + (index<<4);
      int data  = arMem(caddr+12);
      int phase = head>>4;
      int cphase = (data>>16)&0x1;
      if (phase==cphase) break;	// check phase bit
      head = (head+1)&0x1F;
    }
    return head;
  }

  public static void handleCompletions0 () {
    int stat=handleCompletions(cqhead0,BCQ0);
    if (stat==cqhead0) return;
    cqhead0=stat;
    wrmem(0x1004,cqhead0&0xF); // Admin Head Doorbell
  }

  public static void handleCompletions1 () {
    int stat=handleCompletions(cqhead1,BCQ1);
    if (stat==cqhead1) return;
    cqhead1=stat;
    wrmem(0x100C,cqhead1&0xF); // IO Head Doorbell
  }

  public static void capabilities () {
    int i,data,addr,dcs;
    addr = rdcfg(0x34)&0xFF;
    for (i=0; i<32; i+=4) {
      if (addr>0) data = rdcfg(addr);
      else        data = 0;
      if ((data&0xFF)==0x10) {	// PCIe capabilities 
        dcs=rdcfg(addr+8);	// set wMPL=256 rMPL=512
	dcs=dcs|0x20;
        wrcfg(addr+8,dcs);	// set wMPL=256 rMPL=512
      }
      awMem(0x0800+i,data);
      addr = (data>>8)&0xFF;
    }
  }

}
