diff --git a/bpiOverJtag/bpiOverJtag_core.v b/bpiOverJtag/bpiOverJtag_core.v index 3c76adc..251af03 100644 --- a/bpiOverJtag/bpiOverJtag_core.v +++ b/bpiOverJtag/bpiOverJtag_core.v @@ -10,6 +10,7 @@ * 0x1 = Write word to flash (addr + data) * 0x2 = Read word from flash (addr), returns data * 0x3 = NOP / get status + * 0x4 = Burst write (addr + count + N×data words) */ module bpiOverJtag_core ( @@ -46,21 +47,25 @@ wire rst = (capture & sel); wire start_header = (tdi & shift & sel); /* State machine */ -localparam IDLE = 3'd0, - RECV_CMD = 3'd1, - RECV_ADDR = 3'd2, - RECV_DATA = 3'd3, - EXEC = 3'd4, - SEND_DATA = 3'd5, - DONE = 3'd6; +localparam IDLE = 4'd0, + RECV_CMD = 4'd1, + RECV_ADDR = 4'd2, + RECV_DATA = 4'd3, + EXEC = 4'd4, + SEND_DATA = 4'd5, + DONE = 4'd6, + BURST_RECV_CNT = 4'd7, + BURST_DATA = 4'd8, + BURST_EXEC = 4'd9; -reg [2:0] state, state_d; +reg [3:0] state, state_d; reg [5:0] bit_cnt, bit_cnt_d; reg [3:0] cmd_reg, cmd_reg_d; reg [24:0] addr_reg, addr_reg_d; reg [15:0] wr_data_reg, wr_data_reg_d; reg [15:0] rd_data_reg, rd_data_reg_d; reg [7:0] wait_cnt, wait_cnt_d; +reg [15:0] burst_cnt, burst_cnt_d; /* Data bus control */ reg dq_oe; @@ -71,9 +76,10 @@ assign bpi_dq = dq_oe ? dq_out : 16'hzzzz; assign tdo = rd_data_reg[0]; /* Command codes */ -localparam CMD_WRITE = 4'h1, - CMD_READ = 4'h2, - CMD_NOP = 4'h3; +localparam CMD_WRITE = 4'h1, + CMD_READ = 4'h2, + CMD_NOP = 4'h3, + CMD_BURST_WRITE = 4'h4; /* Next state logic */ always @(*) begin @@ -84,6 +90,7 @@ always @(*) begin wr_data_reg_d = wr_data_reg; rd_data_reg_d = rd_data_reg; wait_cnt_d = wait_cnt; + burst_cnt_d = burst_cnt; case (state) IDLE: begin @@ -108,6 +115,9 @@ always @(*) begin if (cmd_reg == CMD_WRITE) begin bit_cnt_d = 15; /* 16 bits for data */ state_d = RECV_DATA; + end else if (cmd_reg == CMD_BURST_WRITE) begin + bit_cnt_d = 15; /* 16 bits for burst count */ + state_d = BURST_RECV_CNT; end else begin wait_cnt_d = 8'd20; /* Wait cycles for read */ state_d = EXEC; @@ -143,6 +153,38 @@ always @(*) begin state_d = DONE; end + BURST_RECV_CNT: begin + burst_cnt_d = {tdi, burst_cnt[15:1]}; + bit_cnt_d = bit_cnt - 1'b1; + if (bit_cnt == 0) begin + bit_cnt_d = 15; + state_d = BURST_DATA; + end + end + + BURST_DATA: begin + wr_data_reg_d = {tdi, wr_data_reg[15:1]}; + bit_cnt_d = bit_cnt - 1'b1; + if (bit_cnt == 0) begin + wait_cnt_d = 8'd20; + state_d = BURST_EXEC; + end + end + + BURST_EXEC: begin + wait_cnt_d = wait_cnt - 1'b1; + if (wait_cnt == 0) begin + burst_cnt_d = burst_cnt - 1'b1; + if (burst_cnt == 16'd1) begin + state_d = DONE; + end else begin + addr_reg_d = addr_reg + 1'b1; + bit_cnt_d = 15; + state_d = BURST_DATA; + end + end + end + DONE: begin /* Stay here until reset */ end @@ -167,6 +209,7 @@ always @(posedge drck) begin wr_data_reg <= wr_data_reg_d; rd_data_reg <= rd_data_reg_d; wait_cnt <= wait_cnt_d; + burst_cnt <= burst_cnt_d; end /* Address output */ @@ -175,6 +218,8 @@ always @(posedge drck or posedge rst) begin bpi_addr <= 25'd0; else if (state == RECV_ADDR && bit_cnt == 0) bpi_addr <= {tdi, addr_reg[24:1]}; + else if (state == BURST_DATA && bit_cnt == 0) + bpi_addr <= addr_reg; end /* BPI Flash control signals */ @@ -202,6 +247,14 @@ always @(posedge drck or posedge rst) begin dq_out <= wr_data_reg; end end + BURST_EXEC: begin + bpi_ce_n <= 1'b0; + bpi_adv_n <= 1'b0; + bpi_oe_n <= 1'b1; + bpi_we_n <= (wait_cnt > 8'd5 && wait_cnt < 8'd15) ? 1'b0 : 1'b1; + dq_oe <= 1'b1; + dq_out <= wr_data_reg; + end default: begin bpi_ce_n <= 1'b1; bpi_oe_n <= 1'b1; @@ -219,7 +272,7 @@ end wire ver_rst = (ver_cap & ver_sel); wire ver_start = (ver_tdi & ver_shift & ver_sel); -localparam VER_VALUE = 40'h30_31_2E_30_30; // "01.00" +localparam VER_VALUE = 40'h30_32_2E_30_30; // "02.00" reg [6:0] ver_cnt, ver_cnt_d; reg [39:0] ver_shft, ver_shft_d; diff --git a/bpiOverJtag/bpiOverJtag_xc7k480tffg1156.bit.gz b/bpiOverJtag/bpiOverJtag_xc7k480tffg1156.bit.gz index 82af67f..ad9bf93 100644 Binary files a/bpiOverJtag/bpiOverJtag_xc7k480tffg1156.bit.gz and b/bpiOverJtag/bpiOverJtag_xc7k480tffg1156.bit.gz differ diff --git a/src/bpiFlash.cpp b/src/bpiFlash.cpp index 2f710af..063bd01 100644 --- a/src/bpiFlash.cpp +++ b/src/bpiFlash.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include "display.hpp" #include "progressBar.hpp" @@ -29,7 +30,8 @@ static inline uint8_t reverseByte(uint8_t b) BPIFlash::BPIFlash(Jtag *jtag, int8_t verbose) : _jtag(jtag), _verbose(verbose), _irlen(6), _capacity(0), _block_size(256 * 1024), - _manufacturer_id(0), _device_id(0) + _manufacturer_id(0), _device_id(0), + _has_burst(false) { } @@ -46,6 +48,7 @@ BPIFlash::~BPIFlash() * 0x1 = Write word * 0x2 = Read word * 0x3 = NOP + * 0x4 = Burst write (addr + count + N×data words) */ uint16_t BPIFlash::bpi_read(uint32_t word_addr) @@ -133,6 +136,94 @@ void BPIFlash::bpi_write(uint32_t word_addr, uint16_t data) _jtag->flush(); } +void BPIFlash::bpi_write_no_flush(uint32_t word_addr, uint16_t data) +{ + /* Same packet as bpi_write() but no shiftIR or flush — + * caller sets IR once before the loop and flushes once after. + */ + const int total_bits = 1 + 4 + 25 + 16 + 20; + const int total_bytes = (total_bits + 7) / 8; + + uint8_t tx[total_bytes]; + memset(tx, 0, total_bytes); + + uint64_t packet = 1; /* start bit */ + packet |= ((uint64_t)CMD_WRITE) << 1; /* cmd at bits [4:1] */ + packet |= ((uint64_t)(word_addr & 0x1FFFFFF)) << 5; /* addr at bits [29:5] */ + packet |= ((uint64_t)data) << 30; /* data at bits [45:30] */ + + for (int i = 0; i < 8; i++) { + tx[i] = (packet >> (i * 8)) & 0xFF; + } + + _jtag->shiftDR(tx, NULL, total_bits); +} + +void BPIFlash::bpi_burst_write(uint32_t word_addr, const uint16_t *data, + uint32_t count) +{ + if (count == 0) + return; + + /* Burst packet: start(1) + cmd(4) + addr(25) + count(16) + N×(data(16) + pad(21)) + * Header: 46 bits. Per word: 37 bits. + */ + const uint32_t header_bits = 1 + 4 + 25 + 16; /* 46 */ + const uint32_t per_word_bits = 16 + 21; /* 37: 20 exec cycles + 1 transition */ + const uint32_t total_bits = header_bits + count * per_word_bits; + const uint32_t total_bytes = (total_bits + 7) / 8; + + std::vector tx(total_bytes, 0); + + /* Helper to set a single bit in the tx buffer */ + auto set_bit = [&](uint32_t bit_pos) { + tx[bit_pos / 8] |= (1 << (bit_pos % 8)); + }; + + /* Pack header LSB-first */ + uint32_t pos = 0; + + /* start bit = 1 */ + set_bit(pos); + pos++; + + /* cmd = CMD_BURST_WRITE (4 bits) */ + for (int i = 0; i < 4; i++) { + if (CMD_BURST_WRITE & (1 << i)) + set_bit(pos); + pos++; + } + + /* addr (25 bits) */ + for (int i = 0; i < 25; i++) { + if (word_addr & (1u << i)) + set_bit(pos); + pos++; + } + + /* count (16 bits) */ + for (int i = 0; i < 16; i++) { + if (count & (1u << i)) + set_bit(pos); + pos++; + } + + /* Pack each data word: 16 data bits + 21 padding bits */ + for (uint32_t w = 0; w < count; w++) { + for (int i = 0; i < 16; i++) { + if (data[w] & (1 << i)) + set_bit(pos); + pos++; + } + pos += 21; /* 20 exec cycles + 1 transition cycle */ + } + + uint8_t user1[] = {0x02}; + _jtag->shiftIR(user1, NULL, _irlen); + _jtag->shiftDR(tx.data(), NULL, total_bits); + _jtag->flush(); +} + bool BPIFlash::detect() { printInfo("Detecting BPI flash..."); @@ -189,6 +280,11 @@ bool BPIFlash::detect() _block_size = 256 * 1024; printInfo("Flash capacity: 64 MB (512 Mbit)"); + /* Enable burst write — assumes v02.00+ JTAG bitstream is loaded. + * Future: could auto-detect via USER4 version readback. + */ + _has_burst = true; + return true; } @@ -345,7 +441,6 @@ bool BPIFlash::write(uint32_t addr, const uint8_t *data, uint32_t len) } /* Program data using buffered programming (0x00E9) - * MT28GU512AAA has 512-word buffer, we use 32 words for reliability * Sequence: Setup(0xE9) -> WordCount(N-1) -> N data words -> Confirm(0xD0) */ printInfo("Programming (buffered mode)..."); @@ -374,9 +469,6 @@ bool BPIFlash::write(uint32_t addr, const uint8_t *data, uint32_t len) last_block = current_block; } - /* Clear any pending status before new buffered program */ - bpi_write(0, FLASH_CMD_CLEAR_STATUS); - /* Calculate how many words to write in this buffer */ uint32_t remaining_bytes = len - offset; uint32_t chunk_bytes = (remaining_bytes > BUFFER_BYTES) ? BUFFER_BYTES : remaining_bytes; @@ -395,26 +487,36 @@ bool BPIFlash::write(uint32_t addr, const uint8_t *data, uint32_t len) printInfo(buf); } - /* Buffered Program Setup - sent to block/colony base address */ - bpi_write(block_word_addr, FLASH_CMD_BUFFERED_PRG); - usleep(10); - - /* Write word count (N-1) - sent to block address per datasheet */ - bpi_write(block_word_addr, chunk_words - 1); - /* Write data words for BPI x16 boot. * Two transformations (same as Vivado write_cfgmem -interface BPIx16): * 1. Bit reversal within each byte: FPGA D00=MSBit, flash DQ[0]=LSBit * 2. Byte swap: first bitstream byte → upper flash byte D[15:8] */ + std::vector word_buf(chunk_words); for (uint32_t w = 0; w < chunk_words; w++) { uint32_t data_offset = offset + w * 2; uint8_t b0 = data[data_offset]; uint8_t b1 = 0xFF; /* pad with 0xFF if odd length */ if (data_offset + 1 < len) b1 = data[data_offset + 1]; - uint16_t word = (reverseByte(b0) << 8) | reverseByte(b1); - bpi_write(word_addr + w, word); + word_buf[w] = (reverseByte(b0) << 8) | reverseByte(b1); + } + + /* Buffered Program Setup - sent to block/colony base address */ + bpi_write(0, FLASH_CMD_CLEAR_STATUS); + bpi_write(block_word_addr, FLASH_CMD_BUFFERED_PRG); + bpi_write(block_word_addr, chunk_words - 1); + + if (_has_burst) { + bpi_burst_write(word_addr, word_buf.data(), chunk_words); + } else { + /* Software-only fallback: one IR, no per-word flush */ + uint8_t user1[] = {0x02}; + _jtag->shiftIR(user1, NULL, _irlen); + for (uint32_t w = 0; w < chunk_words; w++) { + bpi_write_no_flush(word_addr + w, word_buf[w]); + } + _jtag->flush(); } /* Confirm - sent to block address */ @@ -429,9 +531,6 @@ bool BPIFlash::write(uint32_t addr, const uint8_t *data, uint32_t len) return false; } - /* Small delay before next buffer operation */ - usleep(100); - offset += chunk_words * 2; if ((offset & 0xFFF) == 0 || offset >= len) diff --git a/src/bpiFlash.hpp b/src/bpiFlash.hpp index 858807c..0b03f4c 100644 --- a/src/bpiFlash.hpp +++ b/src/bpiFlash.hpp @@ -70,9 +70,10 @@ class BPIFlash { private: /* BPI bridge command codes (match bpiOverJtag_core.v) */ - static const uint8_t CMD_WRITE = 0x1; - static const uint8_t CMD_READ = 0x2; - static const uint8_t CMD_NOP = 0x3; + static const uint8_t CMD_WRITE = 0x1; + static const uint8_t CMD_READ = 0x2; + static const uint8_t CMD_NOP = 0x3; + static const uint8_t CMD_BURST_WRITE = 0x4; /* Intel CFI flash commands */ static const uint16_t FLASH_CMD_READ_ARRAY = 0x00FF; @@ -104,6 +105,17 @@ class BPIFlash { */ void bpi_write(uint32_t word_addr, uint16_t data); + /*! + * \brief Write a 16-bit word without IR shift or flush (for batched writes) + */ + void bpi_write_no_flush(uint32_t word_addr, uint16_t data); + + /*! + * \brief Burst write multiple 16-bit words in a single DR shift + */ + void bpi_burst_write(uint32_t word_addr, const uint16_t *data, + uint32_t count); + /*! * \brief Wait for operation to complete * \return true if completed successfully @@ -122,6 +134,7 @@ class BPIFlash { uint32_t _block_size; uint16_t _manufacturer_id; uint16_t _device_id; + bool _has_burst; }; #endif // SRC_BPIFLASH_HPP_