---------------------------------------------------------------------------------------------------
--! @brief  Dual Port RAM module
--! @details With module generics dual port RAM is generated. Through generics type of RAM is 
--! determined. RAM address width is determind with ADR_W_G generic and data width is determined 
--! with DAT_W_G generic. Delay in clock cycles of output data is determined with PIPELINE_G generic. 
--! Write first mode is selected when WRITE_FIRST_G generic is TRUE. If WRITE_FIRST_G generic is 
--! FALSE, read first mode is selected. Type of RAM is selected with generic BRAM_G. 
--! Generics accepts boolean for implementation of RAM with LUT RAM (BRAM_G => false) or 
--! BRAM (BRAM_G => true).
--! !NOTE! Connetion between generic PIPELINE_G and type of RAM:
--! -PIPELINE_G = 0 :   - the output has no clock delay (asynchronous output),
--!                     - it can only be implemented in LUT RAM, 
--!                       because block RAM primitive can't be asyncronous
--! -PIPELINE_G = N :   - the output has N clock delay (synchronous output),
--!                     - both types of RAM can be implemented, 
--!                     - LUT RAM => N output register
--!                     - block RAM => N-1 output register 
--!                       (one clock delay is used by primitive BRAM)
--!
--! https://www.xilinx.com/itp/xilinx10/books/docs/xst/xst.pdf
--!
--! @author Jernej Kokalj, Cosylab (jernej.kokalj@cosylab.com)
--!
--! @date January 22 2018 created
--! @date March 29 2018 last modify
--!
--! @version v1.0
--!
--! @par Modifications:
--! jkokalj, January 22 2018: Created
--! jkokalj, March 28 2018: design iteration
--!
--! @file CslDpRAM.vhd
---------------------------------------------------------------------------------------------------
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
use ieee.math_real.all;
use work.CslStdRtlPkg.all;

--! @brief  Dual Port RAM module
--! @details With module generics dual port RAM is generated. Through generics type of RAM is 
--! determined. RAM address width is determind with ADR_W_G generic and data width is determined 
--! with DAT_W_G generic. Delay in clock cycles of output data is determined with PIPELINE_G generic. 
--! Write first mode is selected when WRITE_FIRST_G generic is TRUE. If WRITE_FIRST_G generic is 
--! FALSE, read first mode is selected. Type of RAM is selected with generic BRAM_G. 
--! Generics accepts boolean for implementation of RAM with LUT RAM (BRAM_G => false) or 
--! BRAM (BRAM_G => true).
--! !NOTE! Connetion between generic PIPELINE_G and type of RAM:
--! -PIPELINE_G = 0 :   - the output has no clock delay (asynchronous output),
--!                     - it can only be implemented in LUT RAM, 
--!                       because block RAM primitive can't be asyncronous
--! -PIPELINE_G = N :   - the output has N clock delay (synchronous output),
--!                     - both types of RAM can be implemented, 
--!                     - LUT RAM => N output register
--!                     - block RAM => N-1 output register 
--!                       (one clock delay is used by primitive BRAM)
--! @author Jernej Kokalj, Cosylab (jernej.kokalj@cosylab.com)
---------------------------------------------------------------------------------------------------
entity CslDpRAM is
   generic(
      TPD_G          : time     := 1 ns;
      ADR_W_G        : integer := 4;            --! addresa width
      DAT_W_G        : integer := 8 ;           --! data width
      PIPELINE_G     : natural := 2;            --! delay in clock cycles of output data
      WRITE_FIRST_G  : boolean := false;         --! true=> write first mode, false=> read first mode
      BRAM_G         : boolean := false          --! false => LUT RAM, true => BRAM
    );
   port( 
      clka  : IN sl;                         --! local A clock bus
      rsta  : IN sl;                         --! signal A reset bus
      wea   : IN sl;                         --! signal A write enable bus
      addra : IN slv(ADR_W_G-1 DOWNTO 0);    --! signal A vector address bus
      dina  : IN slv(DAT_W_G-1 DOWNTO 0);    --! signal A vector data in bus
      douta : OUT slv(DAT_W_G-1 DOWNTO 0);   --! signal A vector data out bus
      
      clkb  : IN sl;                         --! local B clock bus
      rstb  : IN sl;                         --! signal B reset bus
      addrb : IN slv(ADR_W_G-1 DOWNTO 0);    --! signal B vector address bus
      doutb : OUT slv(DAT_W_G-1 DOWNTO 0)    --! signal B vector data out bus
   );
end CslDpRAM;
---------------------------------------------------------------------------------------------------
architecture rtl of CslDpRAM is

   --internal signals
   type t_ram is array (0 to 2**ADR_W_G - 1) of slv(DAT_W_G - 1 downto 0);
   signal RAM      : t_ram; --! RAM declared
   
   --! BRAM_G boolean to string constant, true => "block", false => "distributed"
   constant RAM_STYLE_C : string := ite(BRAM_G, "block", "distributed");
   --! RAM style attribute to declared RAM => "block"/"distributed"
   attribute ram_style : string;
   attribute ram_style of RAM : signal is RAM_STYLE_C; 
---------------------------------------------------------------------------------------------------  
begin
   
   --! @brief RAM write procces 
   --! @details Synchronised RAM write data to address
   --! @param[in]  clka, wea, dina
   --! @param[out] RAM 
   p_RAMA : process(clka)
   begin
      if rising_edge(clka) then
         if wea = '1' then --! write enable
            RAM(to_integer(unsigned(addra))) <= dina after TPD_G;
         end if;
      end if;
   end process p_RAMA;
   
   --! write first mode generated
   GEN_WRITE_FIRST : if WRITE_FIRST_G generate
      --! PIPELINE_G is 1 or more, synchronised output data is generated 
      GEN_SYNC_OUT: if PIPELINE_G > 0 generate
         signal regOutA : slv(DAT_W_G - 1 DOWNTO 0); --! synchonised data register
         signal regOutB : slv(DAT_W_G - 1 DOWNTO 0); --! synchonised data register
      begin
         --! @brief RAM read A procces 
         --! @details Synchronised RAM read data from address
         --! @param[in]  clka, wea, dina, RAM
         --! @param[out] regOutA 
         p_SYNC_OUTA : process(clka)
         begin
            if rising_edge(clka) then
               if rsta = '1' then
                  --! synchronised output reset
                  regOutA <= (others => '0') after TPD_G;
               elsif wea ='1' then
                  --! when write enabled, ouptut in write first mode is input data
                  regOutA <= dina after TPD_G;
               else
                  --! when write is disabled, read data from RAM 
                  regOutA <= RAM(to_integer(unsigned(addra))) after TPD_G;
               end if;
            end if;
         end process p_SYNC_OUTA;
         
         --! @brief RAM read B procces 
         --! @details Synchronised RAM read data from address
         --! @param[in]  clkb, RAM
         --! @param[out] regOutB 
         p_SYNC_OUTB : process(clkb)
         begin
            if rising_edge(clkb) then
               if rstb = '1' then
                  --! synchronised output reset
                  regOutB <= (others => '0') after TPD_G;
               else
                  --! read data from RAM
                  regOutB <= RAM(to_integer(unsigned(addrb))) after TPD_G;
               end if;
            end if;
         end process p_SYNC_OUTB; 
                  
         --! generated output data with additional output registers
         GEN_OUT_REG    :if PIPELINE_G > 0 generate
            uPipelineA : entity work.CslSyncVec
               generic map(
                  TPD_G  => TPD_G,
                  WIDTH_G => DAT_W_G,	      --! input/output data width
                  DEPTH_G => PIPELINE_G - 1  --! depth of synchroniser
               )
               port map(
                  clk_i => clka,    --! input clock bus
                  rst_i => '0',     --! Input reset bus
                  sig_i => regOutA,  --! Input vector signal bus
                  sig_o => douta);  --! Output vector signal bus
            
            uPipelineB : entity work.CslSyncVec
               generic map(
                  TPD_G  => TPD_G,
                  WIDTH_G => DAT_W_G,	      --! input/output data width
                  DEPTH_G => PIPELINE_G - 1  --! depth of synchroniser
               )
               port map(
                  clk_i => clkb,    --! input clock bus
                  rst_i => '0',     --! Input reset bus
                  sig_i => regOutB,  --! Input vector signal bus
                  sig_o => doutb);  --! Output vector signal bus
         end generate GEN_OUT_REG;
      end generate GEN_SYNC_OUT;
      
      --! PIPELINE_G is 0 (no clock cycles delay), asynchronised output data is generated 
      GEN_ASYNC_OUT: if PIPELINE_G = 0 generate
         douta <= RAM(to_integer(unsigned(addra))) when rsta = '0' 
                  else (others => '0');
         doutb <= RAM(to_integer(unsigned(addrb))) when rstb = '0' 
                  else (others => '0');
      end generate GEN_ASYNC_OUT;
   end generate GEN_WRITE_FIRST ;
   
   --! read first mode generated
   GEN_READ_FIRST : if not(WRITE_FIRST_G) generate 
      --! PIPELINE_G is 1 or more, synchronised output data is generated
      GEN_SYNC_OUT: if PIPELINE_G > 0 generate
         signal regOutA : slv(DAT_W_G - 1 DOWNTO 0); --! synchonised data register
         signal regOutB : slv(DAT_W_G - 1 DOWNTO 0); --! synchonised data register
      begin
         --! @brief RAM read A procces 
         --! @details Synchronised RAM read data from address
         --! @param[in]  clka, RAM
         --! @param[out] regOutA 
         p_SYNC_OUTA : process(clka)
         begin
            if rising_edge(clka) then
               if rsta = '1' then
                  --! synchronised output reset
                  regOutA <= (others => '0') after TPD_G;
               else
                  --! read data from RAM
                  regOutA <= RAM(to_integer(unsigned(addra))) after TPD_G;
               end if;
            end if;
         end process p_SYNC_OUTA;

         --! @brief RAM read B procces 
         --! @details Synchronised RAM read data from address
         --! @param[in]  clkb, RAM
         --! @param[out] regOutB          
         p_SYNC_OUTB : process(clkb)
         begin
            if rising_edge(clkb) then
               if rstb = '1' then
                  --! synchronised output reset
                  regOutB <= (others => '0') after TPD_G;
               else
                  --! read data from RAM
                  regOutB <= RAM(to_integer(unsigned(addrb))) after TPD_G;
               end if;
            end if;
         end process p_SYNC_OUTB; 
         
         --! generated output data with additional output registers
         GEN_OUT_REG    :if PIPELINE_G > 0 generate
            uPipelineA : entity work.CslSyncVec
               generic map(
                  TPD_G  => TPD_G,
                  WIDTH_G => DAT_W_G,	      --! input/output data width
                  DEPTH_G => PIPELINE_G - 1  --! depth of synchroniser
               )
               port map(
                  clk_i => clka,    --! input clock bus
                  rst_i => '0',     --! Input reset bus
                  sig_i => regOutA,  --! Input vector signal bus
                  sig_o => douta);  --! Output vector signal bus
            
            uPipelineB : entity work.CslSyncVec
               generic map(
                  TPD_G  => TPD_G,
                  WIDTH_G => DAT_W_G,	      --! input/output data width
                  DEPTH_G => PIPELINE_G - 1  --! depth of synchroniser
               )
               port map(
                  clk_i => clkb,    --! input clock bus
                  rst_i => '0',     --! Input reset bus
                  sig_i => regOutB,  --! Input vector signal bus
                  sig_o => doutb);  --! Output vector signal bus
         end generate GEN_OUT_REG;
      end generate GEN_SYNC_OUT;
      
      --! PIPELINE_G is 0 (no clock cycles delay), asynchronised output data is generated 
      GEN_ASYNC_OUT: if PIPELINE_G = 0 generate
         douta <= RAM(to_integer(unsigned(addra))) when rsta = '0' 
                  else (others => '0');
         doutb <= RAM(to_integer(unsigned(addrb))) when rstb = '0' 
                  else (others => '0');
      end generate GEN_ASYNC_OUT;
   end generate GEN_READ_FIRST;

end rtl;
---------------------------------------------------------------------------------------------------