---------------------------------------------------------------------------------------------------
--! @brief  Csl Dma Write and Read module testing
--! @details  
--!    The test case is to find maximum data througput. Testbench is generating 8 Axi Stream channels.
--!    Each channel has 32 bit width data vector. These Channels are running in parrarel. The result
--!    is a full data bandwidth throughput when clock doman of Dma part of system is in correlation 
--!    of 6.5ns to 10ns of clock period on data acquisition part of system.
--!
--! @author
--!
--! @date January 12 2018 created
--!
--! @version 
--!
--!
--! @file CslDmaThroughputTb.vhd
---------------------------------------------------------------------------------------------------
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
use ieee.math_real.all;
use work.CslStdRtlPkg.all;
use work.CslAxiPkg.all;
use work.CslDmaTypePkg.all;
--! @brief
--! @details
--! @author
---------------------------------------------------------------------------------------------------
entity CslDmaThroughputTb is
end CslDmaThroughputTb;
---------------------------------------------------------------------------------------------------
architecture behavior of CslDmaThroughputTb is

   --Inputs
   signal clk_i     : sl := '0'; --! internal clock signal bus
   signal rst_i     : sl := '0'; --! internal reset signal bus
   signal clkFast_i : sl := '0'; --! internal clock signal bus

   -- constants for Dma
   constant TPD_G         : time     := 1 ns;
   constant AWIDTH_G      : positive := 8;  --! Address width
   constant DWIDTH_G      : positive := 32; --! Data width
   constant PIPELINE_G    : natural  := 0;  --! delay in clock cycles of output data
   constant WRITE_FIRST_G : boolean  := false;
   constant BRAM_G        : boolean  := false; --! false => LUT RAM, true => BRAM

   --! axi write signal from Axi Interconnect wrapper to block design
   signal axiWriteMaster_o : AxiWriteMasterType;
   signal axiWriteSlave_i  : AxiWriteSlaveType;
   signal axiReadMaster_o  : AxiReadMasterType;
   signal axiReadSlave_i   : AxiReadSlaveType;

   --! Axi Stream signals channels to and from Axi Stream Asynchronous Fifos
   signal axisWriteMaster_i : AxiStreamMasterArray(0 to 7) := (others => AXI_STREAM_MASTER_INIT_C);
   signal axisWriteSlave_o  : AxiStreamSlaveArray(0 to 7)  := (others => AXI_STREAM_SLAVE_INIT_C);
   signal axisReadMaster_o  : AxiStreamMasterArray(0 to 7) := (others => AXI_STREAM_MASTER_INIT_C);
   signal axisReadSlave_i   : AxiStreamSlaveArray(0 to 7)  := (others => AXI_STREAM_SLAVE_INIT_C);

   signal mAxisWriteMaster : AxiStreamMasterArray(0 to 7) := (others => AXI_STREAM_MASTER_INIT_C);
   signal mAxisWriteSlave  : AxiStreamSlaveArray(0 to 7)  := (others => AXI_STREAM_SLAVE_INIT_C);
   signal mAxisReadMaster  : AxiStreamMasterArray(0 to 7) := (others => AXI_STREAM_MASTER_INIT_C);
   signal mAxisReadSlave   : AxiStreamSlaveArray(0 to 7)  := (others => AXI_STREAM_SLAVE_INIT_C);
   signal axisReadPause    : slv(7 downto 0);
   signal axisWritePause   : slv(7 downto 0);

   --! Dma Control signals for Write and Read operations
   type DmaCtrlArray is array(0 to 7) of DmaCtrlType;
   type DmaStatArray is array(0 to 7) of DmaStatType;
   signal dmaWriteCtrl_i : DmaCtrlArray := (others => DMA_CTRL_INIT_C);
   signal dmaWriteStat_o : DmaStatArray;

   signal dmaReadCtrl_i : DmaCtrlArray := (others => DMA_CTRL_INIT_C);
   signal dmaReadStat_o : DmaStatArray;
   signal dmaReadData   : Slv32Array(7 downto 0);
   signal RdChData      : Slv32Array(7 downto 0);

   -- test  block design signals
   signal axi_resetn_0 : sl;
   signal rsta_busy_0  : sl;
   signal s_axi_aclk_0 : sl;

   --! number of valid words on Axi stream
   constant WORD_DMA_WR_C : integer := 16;
   constant NUM_OF_DATA_C : integer := WORD_DMA_WR_C*60;

   --! Axi Interconnect Wrapper signals
   signal sAxiWriteMasters : AxiWriteMasterArray(7 downto 0);
   signal sAxiWriteSlaves  : AxiWriteSlaveArray(7 downto 0);
   signal sAxiReadMasters  : AxiReadMasterArray(7 downto 0);
   signal sAxiReadSlaves   : AxiReadSlaveArray(7 downto 0);

   -- Clock period definitions 
   constant T_C      : time := 10.0 ns; --! Clock period constant
   constant T_FAST_C : time := 6.5 ns;  --! Clock period constant
                                        ---------------------------------------------------------------------------------------------------
begin

   -- Instantiate the Axi4Throughput block design
   u_Axi4Throughput : entity work.Axi4Throughput
      port map (
         S_AXI_0_araddr   => axiReadMaster_o.araddr(31 downto 0),        
         S_AXI_0_arburst  => axiReadMaster_o.arburst,                    
         S_AXI_0_arcache  => axiReadMaster_o.arcache,                    
         S_AXI_0_arid     => axiReadMaster_o.arid(3 downto 0),           
         S_AXI_0_arlen    => axiReadMaster_o.arlen,                      
         S_AXI_0_arlock   => axiReadMaster_o.arlock(0 downto 0),         
         S_AXI_0_arprot   => axiReadMaster_o.arprot,                     
         S_AXI_0_arqos    => axiReadMaster_o.arqos,                      
         S_AXI_0_arready  => axiReadSlave_i.arready,                     
         S_AXI_0_arregion => axiReadMaster_o.arregion,                   
         S_AXI_0_arsize   => axiReadMaster_o.arsize,                     
         S_AXI_0_arvalid  => axiReadMaster_o.arvalid,                    
         S_AXI_0_awaddr   => axiWriteMaster_o.awaddr(31 downto 0),       
         S_AXI_0_awburst  => axiWriteMaster_o.awburst,                   
         S_AXI_0_awcache  => axiWriteMaster_o.awcache,                   
         S_AXI_0_awid     => axiWriteMaster_o.awid(3 downto 0),          
         S_AXI_0_awlen    => axiWriteMaster_o.awlen,                     
         S_AXI_0_awlock   => axiWriteMaster_o.awlock(0 downto 0),        
         S_AXI_0_awprot   => axiWriteMaster_o.awprot,                    
         S_AXI_0_awqos    => axiWriteMaster_o.awqos,                     
         S_AXI_0_awready  => axiWriteSlave_i.awready,                    
         S_AXI_0_awregion => axiWriteMaster_o.awregion,                  
         S_AXI_0_awsize   => axiWriteMaster_o.awsize,                    
         S_AXI_0_awvalid  => axiWriteMaster_o.awvalid,                   
         S_AXI_0_bid      => axiWriteSlave_i.bid(3 downto 0),            
         S_AXI_0_bready   => axiWriteMaster_o.bready,                    
         S_AXI_0_bresp    => axiWriteSlave_i.bresp,                      
         S_AXI_0_bvalid   => axiWriteSlave_i.bvalid,                     
         S_AXI_0_rdata    => axiReadSlave_i.rdata(511 downto 0),        
         S_AXI_0_rid      => axiReadSlave_i.rid(3 downto 0),             
         S_AXI_0_rlast    => axiReadSlave_i.rlast,                       
         S_AXI_0_rready   => axiReadMaster_o.rready,                     
         S_AXI_0_rresp    => axiReadSlave_i.rresp,                       
         S_AXI_0_rvalid   => axiReadSlave_i.rvalid,                      
         S_AXI_0_wdata    => axiWriteMaster_o.wdata(511 downto 0),       
         S_AXI_0_wlast    => axiWriteMaster_o.wlast,                     
         S_AXI_0_wready   => axiWriteSlave_i.wready,                     
         S_AXI_0_wstrb    => axiWriteMaster_o.wstrb(63 downto 0),        
         S_AXI_0_wvalid   => axiWriteMaster_o.wvalid,                    
         axi_resetn_0     => axi_resetn_0,                               
         rsta_busy_0      => rsta_busy_0,                                
         s_axi_aclk_0     => s_axi_aclk_0                                
      );

   axi_resetn_0 <= not rst_i;
   s_axi_aclk_0 <= clkFast_i;

   -----------------------------------------------------------------------------
   --Axi Interconnet Wrapper
   -----------------------------------------------------------------------------

   -- Instantiate the AxiIntWrap module => Axi Interconnect wrapper
   u_AxiIntWrap : entity work.AxiIntWrapTest
      generic map (
         TPD_G => TPD_G
      )
      port map (
         clk_i              => clkFast_i,        --! clock signal bus
         rst_i              => rst_i,            --! reset signal bus
         sAxiWriteMasters_i => sAxiWriteMasters, --! slaves axi4 write input signals
         sAxiWriteSlaves_o  => sAxiWriteSlaves,  --! slaves axi4 write ouptut signals
         sAxiReadMasters_i  => sAxiReadMasters,  --! slaves axi4 read input signals
         sAxiReadSlaves_o   => sAxiReadSlaves,   --! slaves axi4 read ouptut signals
         mAxiWriteMaster_o  => axiWriteMaster_o, --! master axi4 write ouptut signals
         mAxiWriteSlave_i   => axiWriteSlave_i,  --! master axi4 write input signals
         mAxiReadMaster_o   => axiReadMaster_o,  --! master axi4 read ouptut signals
         mAxiReadSlave_i    => axiReadSlave_i    --! master axi4 read input signals
      );

   --!generated Edge detection vector component 
   gen_CslDma : for i in 0 to 7 generate
      -- Instantiate the CslAxisFifo module
      uut_CslAxisFifo : entity work.CslAxisFifo
         generic map (
            TPD_G          => TPD_G,
            PIPELINE_G     => 1,
            BRAM_G         => false,
            AWIDTH_G       => 8,
            THRESHOLD_G    => 128,
            TUSER_NORMAL_G => false,
            TID_WIDTH_G    => 1,
            TDEST_WIDTH_G  => 1,
            TUSER_WIDTH_G  => 1,
            BYTES_SLV_G    => 4,
            BYTES_MST_G    => 4)
         port map (
            sAxisClk_i    => clk_i,
            sAxisRst_i    => rst_i,
            sAxisMaster_i => axisWriteMaster_i(i),
            sAxisSlave_o  => axisWriteSlave_o(i),
            mAxisClk_i    => clkFast_i,
            mAxisRst_i    => rst_i,
            mAxisMaster_o => mAxisWriteMaster(i),
            mAxisSlave_i  => mAxisWriteSlave(i),
            pause_o       => axisWritePause(i)
         );

      -- Instantiate the CslDmaWrite module
      uut_CslDmaWrite : entity work.CslDmaWrite
         generic map (
            TPD_G         => TPD_G,
            AWIDTH_G      => AWIDTH_G,
            DWIDTH_G      => DWIDTH_G,
            WRITE_FIRST_G => WRITE_FIRST_G)
         port map (
            clk_i             => clkFast_i,
            rst_i             => rst_i,
            axisWriteMaster_i => mAxisWriteMaster(i),
            axisWriteSlave_o  => mAxisWriteSlave(i),
            dmaWriteCtrl_i    => dmaWriteCtrl_i(i),
            dmaWriteStat_o    => dmaWriteStat_o(i),
            axiWriteMaster_o  => sAxiWriteMasters(i),
            axiWriteSlave_i   => sAxiWriteSlaves(i)
         );

      -- Instantiate the CslAxisFifo module
      uut1_CslAxisFifo : entity work.CslAxisFifo
         generic map (
            TPD_G          => TPD_G,
            PIPELINE_G     => 1,
            BRAM_G         => false,
            AWIDTH_G       => 8,
            THRESHOLD_G    => 128,
            TUSER_NORMAL_G => false,
            TID_WIDTH_G    => 1,
            TDEST_WIDTH_G  => 1,
            TUSER_WIDTH_G  => 1,
            BYTES_SLV_G    => 4,
            BYTES_MST_G    => 4)
         port map (
            sAxisClk_i    => clkFast_i,
            sAxisRst_i    => rst_i,
            sAxisMaster_i => mAxisReadMaster(i),
            sAxisSlave_o  => mAxisReadSlave(i),
            mAxisClk_i    => clk_i,
            mAxisRst_i    => rst_i,
            mAxisMaster_o => axisReadMaster_o(i),
            mAxisSlave_i  => axisReadSlave_i(i),
            pause_o       => axisReadPause(i)
         );

      -- Instantiate the CslDmaRead module
      uut_CslDmaRead : entity work.CslDmaRead
         generic map (
            TPD_G         => TPD_G,
            AWIDTH_G      => AWIDTH_G,
            DWIDTH_G      => DWIDTH_G,
            PIPELINE_G    => 0,
            WRITE_FIRST_G => WRITE_FIRST_G,
            BRAM_G        => BRAM_G)
         port map (
            clk_i            => clkFast_i,
            rst_i            => rst_i,
            axisReadMaster_o => mAxisReadMaster(i),
            axisReadSlave_i  => mAxisReadSlave(i),
            dmaReadCtrl_i    => dmaReadCtrl_i(i),
            dmaReadStat_o    => dmaReadStat_o(i),
            axiReadMaster_o  => sAxiReadMasters(i),
            axiReadSlave_i   => sAxiReadSlaves(i)
         );

      dmaReadData(i) <= axisReadMaster_o(i).tData(31 downto 0);

      --! @brief Clock procces 
      --! @details Dma Write signals generator
      --! @param[in]  axisReadMaster_o, NUM_OF_DATA_C, T_C, WORD_DMA_WR_C
      --! @param[out] dmaWriteCtrl_i, axisWriteMaster_i
      p_Sim_Ch : process
         variable dataCnt : integer := 1;
      begin

         dmaWriteCtrl_i(i).en        <= '1';
         dmaWriteCtrl_i(i).threshold <= slv(to_unsigned(WORD_DMA_WR_C,32));
         dmaWriteCtrl_i(i).startAddr <= x"0000"&slv(to_unsigned(i,4))&x"000";
         dmaWriteCtrl_i(i).size      <= slv(to_unsigned(NUM_OF_DATA_C,32));
         dmaWriteCtrl_i(i).circular  <= '0';
         axisWriteMaster_i(i).tValid <= '0';

         wait until clk_i = '1';

         wait for TPD_G;

         wait for T_C * 100 + T_C * 10 * i;

         dataCnt := 1;

         for j in 0 to NUM_OF_DATA_C loop
            axisWriteMaster_i(i).tValid             <= '1';
            axisWriteMaster_i(i).tData(31 downto 0) <= slv(to_unsigned(j, 32));
            wait for T_C;
            axisWriteMaster_i(i).tValid <= '0';
         --wait for T_C;
         end loop;
         wait;
      end process;

      --! @brief Clock procces 
      --! @details Clock signal generator
      --! @param[in]  axisReadMaster_o, NUM_OF_DATA_C, T_C, WORD_DMA_WR_C
      --! @param[out] dmaReadCtrl_i, axisReadSlave_i, RdChData
      p_Sim_RdCh : process
         variable dataCnt : integer := 1;
      begin
         dmaReadCtrl_i(i).en        <= '0';
         dmaReadCtrl_i(i).threshold <= slv(to_unsigned(16,32));
         dmaReadCtrl_i(i).startAddr <= x"0000"&slv(to_unsigned(i,4))&x"000";
         dmaReadCtrl_i(i).size      <= slv(to_unsigned(NUM_OF_DATA_C,32));
         dmaReadCtrl_i(i).circular  <= '0';
         axisReadSlave_i(i).tReady  <= '0';

         -- Enable read channel
         wait for T_C * 2000 +T_C * 100*i;
         dmaReadCtrl_i(i).en <= '1';
         dataCnt             := 0;

         wait until axisReadMaster_o(i).tValid = '1';

         wait for T_C * 100 + TPD_G;
         axisReadSlave_i(i).tReady <= '1';

         for j in 0 to NUM_OF_DATA_C - WORD_DMA_WR_C loop
            -- Check out read channel
            if (axisReadMaster_o(i).tValid = '1') then
               --! data is valid, compare output data
               if (axisReadMaster_o(i).tData(31 downto 0) /= slv(to_unsigned(dataCnt, 32))) then
                  wait for T_C;
                  assert false report "SIMULATION FAILED" severity failure;
               end if;
               RdChData(i) <= slv(to_unsigned(dataCnt,32));
               dataCnt     := dataCnt + 1;
               wait for T_C * 1;
            else
               --! wait until valid data
               wait until axisReadMaster_o(i).tValid = '1';
               wait for TPD_G;
               if (axisReadMaster_o(i).tData(31 downto 0) /= slv(to_unsigned(dataCnt, 32))) then
                  wait for T_C;
                  assert false report "SIMULATION FAILED" severity failure;
               end if;
               RdChData(i) <= slv(to_unsigned(dataCnt,32));
               dataCnt     := dataCnt + 1;
               wait for T_C * 1;
            end if;
         end loop;

         wait for T_C * 10;
         axisReadSlave_i(i).tReady <= '1';

         if i = 7 then
            wait for T_C * 100;
            ---- Stop simulation
            assert false report "SIMULATION COMPLEATED" severity failure;
         else
            wait;
         end if;
      end process;
   end generate gen_CslDma;



   --! @brief Clock procces 
   --! @details Clock signal generator
   --! @param[in]  T_C
   --! @param[out] clk_i
   p_SyncClkGen : process
   begin
      clk_i <= '0';
      wait for T_C/2;
      clk_i <= '1';
      wait for T_C/2;
   end process;

   --! @brief Clock procces 
   --! @details Clock signal generator
   --! @param[in]  T_FAST_C
   --! @param[out] clkFast_i
   p_SyncClkFastGen : process
   begin
      clkFast_i <= '0';
      wait for T_FAST_C/2;
      clkFast_i <= '1';
      wait for T_FAST_C/2;
   end process;

   --! @brief Rest procces 
   --! @details Clock signal generator
   --! @param[in]  T_C
   --! @param[out] rst_i
   p_RstGen : process
   begin
      -- Rest gen
      rst_i <= '0';
      wait for T_C * 10;
      -- reset
      rst_i <= '1';
      wait for T_C * 10;
      rst_i <= '0';
      wait for T_C * 10;
      wait;
   end process;

end;
---------------------------------------------------------------------------------------------------
