Speed up from 25 minutes to ~6 minutes

This commit is contained in:
blurbdust 2026-02-09 10:58:24 -06:00
parent 0205c3d5c5
commit a91dd9f628
4 changed files with 197 additions and 32 deletions

View File

@ -10,6 +10,7 @@
* 0x1 = Write word to flash (addr + data)
* 0x2 = Read word from flash (addr), returns data
* 0x3 = NOP / get status
* 0x4 = Burst write (addr + count + N×data words)
*/
module bpiOverJtag_core (
@ -46,21 +47,25 @@ wire rst = (capture & sel);
wire start_header = (tdi & shift & sel);
/* State machine */
localparam IDLE = 3'd0,
RECV_CMD = 3'd1,
RECV_ADDR = 3'd2,
RECV_DATA = 3'd3,
EXEC = 3'd4,
SEND_DATA = 3'd5,
DONE = 3'd6;
localparam IDLE = 4'd0,
RECV_CMD = 4'd1,
RECV_ADDR = 4'd2,
RECV_DATA = 4'd3,
EXEC = 4'd4,
SEND_DATA = 4'd5,
DONE = 4'd6,
BURST_RECV_CNT = 4'd7,
BURST_DATA = 4'd8,
BURST_EXEC = 4'd9;
reg [2:0] state, state_d;
reg [3:0] state, state_d;
reg [5:0] bit_cnt, bit_cnt_d;
reg [3:0] cmd_reg, cmd_reg_d;
reg [24:0] addr_reg, addr_reg_d;
reg [15:0] wr_data_reg, wr_data_reg_d;
reg [15:0] rd_data_reg, rd_data_reg_d;
reg [7:0] wait_cnt, wait_cnt_d;
reg [15:0] burst_cnt, burst_cnt_d;
/* Data bus control */
reg dq_oe;
@ -71,9 +76,10 @@ assign bpi_dq = dq_oe ? dq_out : 16'hzzzz;
assign tdo = rd_data_reg[0];
/* Command codes */
localparam CMD_WRITE = 4'h1,
CMD_READ = 4'h2,
CMD_NOP = 4'h3;
localparam CMD_WRITE = 4'h1,
CMD_READ = 4'h2,
CMD_NOP = 4'h3,
CMD_BURST_WRITE = 4'h4;
/* Next state logic */
always @(*) begin
@ -84,6 +90,7 @@ always @(*) begin
wr_data_reg_d = wr_data_reg;
rd_data_reg_d = rd_data_reg;
wait_cnt_d = wait_cnt;
burst_cnt_d = burst_cnt;
case (state)
IDLE: begin
@ -108,6 +115,9 @@ always @(*) begin
if (cmd_reg == CMD_WRITE) begin
bit_cnt_d = 15; /* 16 bits for data */
state_d = RECV_DATA;
end else if (cmd_reg == CMD_BURST_WRITE) begin
bit_cnt_d = 15; /* 16 bits for burst count */
state_d = BURST_RECV_CNT;
end else begin
wait_cnt_d = 8'd20; /* Wait cycles for read */
state_d = EXEC;
@ -143,6 +153,38 @@ always @(*) begin
state_d = DONE;
end
BURST_RECV_CNT: begin
burst_cnt_d = {tdi, burst_cnt[15:1]};
bit_cnt_d = bit_cnt - 1'b1;
if (bit_cnt == 0) begin
bit_cnt_d = 15;
state_d = BURST_DATA;
end
end
BURST_DATA: begin
wr_data_reg_d = {tdi, wr_data_reg[15:1]};
bit_cnt_d = bit_cnt - 1'b1;
if (bit_cnt == 0) begin
wait_cnt_d = 8'd20;
state_d = BURST_EXEC;
end
end
BURST_EXEC: begin
wait_cnt_d = wait_cnt - 1'b1;
if (wait_cnt == 0) begin
burst_cnt_d = burst_cnt - 1'b1;
if (burst_cnt == 16'd1) begin
state_d = DONE;
end else begin
addr_reg_d = addr_reg + 1'b1;
bit_cnt_d = 15;
state_d = BURST_DATA;
end
end
end
DONE: begin
/* Stay here until reset */
end
@ -167,6 +209,7 @@ always @(posedge drck) begin
wr_data_reg <= wr_data_reg_d;
rd_data_reg <= rd_data_reg_d;
wait_cnt <= wait_cnt_d;
burst_cnt <= burst_cnt_d;
end
/* Address output */
@ -175,6 +218,8 @@ always @(posedge drck or posedge rst) begin
bpi_addr <= 25'd0;
else if (state == RECV_ADDR && bit_cnt == 0)
bpi_addr <= {tdi, addr_reg[24:1]};
else if (state == BURST_DATA && bit_cnt == 0)
bpi_addr <= addr_reg;
end
/* BPI Flash control signals */
@ -202,6 +247,14 @@ always @(posedge drck or posedge rst) begin
dq_out <= wr_data_reg;
end
end
BURST_EXEC: begin
bpi_ce_n <= 1'b0;
bpi_adv_n <= 1'b0;
bpi_oe_n <= 1'b1;
bpi_we_n <= (wait_cnt > 8'd5 && wait_cnt < 8'd15) ? 1'b0 : 1'b1;
dq_oe <= 1'b1;
dq_out <= wr_data_reg;
end
default: begin
bpi_ce_n <= 1'b1;
bpi_oe_n <= 1'b1;
@ -219,7 +272,7 @@ end
wire ver_rst = (ver_cap & ver_sel);
wire ver_start = (ver_tdi & ver_shift & ver_sel);
localparam VER_VALUE = 40'h30_31_2E_30_30; // "01.00"
localparam VER_VALUE = 40'h30_32_2E_30_30; // "02.00"
reg [6:0] ver_cnt, ver_cnt_d;
reg [39:0] ver_shft, ver_shft_d;

View File

@ -9,6 +9,7 @@
#include <unistd.h>
#include <cstring>
#include <stdexcept>
#include <vector>
#include "display.hpp"
#include "progressBar.hpp"
@ -29,7 +30,8 @@ static inline uint8_t reverseByte(uint8_t b)
BPIFlash::BPIFlash(Jtag *jtag, int8_t verbose)
: _jtag(jtag), _verbose(verbose), _irlen(6),
_capacity(0), _block_size(256 * 1024),
_manufacturer_id(0), _device_id(0)
_manufacturer_id(0), _device_id(0),
_has_burst(false)
{
}
@ -46,6 +48,7 @@ BPIFlash::~BPIFlash()
* 0x1 = Write word
* 0x2 = Read word
* 0x3 = NOP
* 0x4 = Burst write (addr + count + N×data words)
*/
uint16_t BPIFlash::bpi_read(uint32_t word_addr)
@ -133,6 +136,94 @@ void BPIFlash::bpi_write(uint32_t word_addr, uint16_t data)
_jtag->flush();
}
void BPIFlash::bpi_write_no_flush(uint32_t word_addr, uint16_t data)
{
/* Same packet as bpi_write() but no shiftIR or flush —
* caller sets IR once before the loop and flushes once after.
*/
const int total_bits = 1 + 4 + 25 + 16 + 20;
const int total_bytes = (total_bits + 7) / 8;
uint8_t tx[total_bytes];
memset(tx, 0, total_bytes);
uint64_t packet = 1; /* start bit */
packet |= ((uint64_t)CMD_WRITE) << 1; /* cmd at bits [4:1] */
packet |= ((uint64_t)(word_addr & 0x1FFFFFF)) << 5; /* addr at bits [29:5] */
packet |= ((uint64_t)data) << 30; /* data at bits [45:30] */
for (int i = 0; i < 8; i++) {
tx[i] = (packet >> (i * 8)) & 0xFF;
}
_jtag->shiftDR(tx, NULL, total_bits);
}
void BPIFlash::bpi_burst_write(uint32_t word_addr, const uint16_t *data,
uint32_t count)
{
if (count == 0)
return;
/* Burst packet: start(1) + cmd(4) + addr(25) + count(16) + N×(data(16) + pad(21))
* Header: 46 bits. Per word: 37 bits.
*/
const uint32_t header_bits = 1 + 4 + 25 + 16; /* 46 */
const uint32_t per_word_bits = 16 + 21; /* 37: 20 exec cycles + 1 transition */
const uint32_t total_bits = header_bits + count * per_word_bits;
const uint32_t total_bytes = (total_bits + 7) / 8;
std::vector<uint8_t> tx(total_bytes, 0);
/* Helper to set a single bit in the tx buffer */
auto set_bit = [&](uint32_t bit_pos) {
tx[bit_pos / 8] |= (1 << (bit_pos % 8));
};
/* Pack header LSB-first */
uint32_t pos = 0;
/* start bit = 1 */
set_bit(pos);
pos++;
/* cmd = CMD_BURST_WRITE (4 bits) */
for (int i = 0; i < 4; i++) {
if (CMD_BURST_WRITE & (1 << i))
set_bit(pos);
pos++;
}
/* addr (25 bits) */
for (int i = 0; i < 25; i++) {
if (word_addr & (1u << i))
set_bit(pos);
pos++;
}
/* count (16 bits) */
for (int i = 0; i < 16; i++) {
if (count & (1u << i))
set_bit(pos);
pos++;
}
/* Pack each data word: 16 data bits + 21 padding bits */
for (uint32_t w = 0; w < count; w++) {
for (int i = 0; i < 16; i++) {
if (data[w] & (1 << i))
set_bit(pos);
pos++;
}
pos += 21; /* 20 exec cycles + 1 transition cycle */
}
uint8_t user1[] = {0x02};
_jtag->shiftIR(user1, NULL, _irlen);
_jtag->shiftDR(tx.data(), NULL, total_bits);
_jtag->flush();
}
bool BPIFlash::detect()
{
printInfo("Detecting BPI flash...");
@ -189,6 +280,11 @@ bool BPIFlash::detect()
_block_size = 256 * 1024;
printInfo("Flash capacity: 64 MB (512 Mbit)");
/* Enable burst write — assumes v02.00+ JTAG bitstream is loaded.
* Future: could auto-detect via USER4 version readback.
*/
_has_burst = true;
return true;
}
@ -345,7 +441,6 @@ bool BPIFlash::write(uint32_t addr, const uint8_t *data, uint32_t len)
}
/* Program data using buffered programming (0x00E9)
* MT28GU512AAA has 512-word buffer, we use 32 words for reliability
* Sequence: Setup(0xE9) -> WordCount(N-1) -> N data words -> Confirm(0xD0)
*/
printInfo("Programming (buffered mode)...");
@ -374,9 +469,6 @@ bool BPIFlash::write(uint32_t addr, const uint8_t *data, uint32_t len)
last_block = current_block;
}
/* Clear any pending status before new buffered program */
bpi_write(0, FLASH_CMD_CLEAR_STATUS);
/* Calculate how many words to write in this buffer */
uint32_t remaining_bytes = len - offset;
uint32_t chunk_bytes = (remaining_bytes > BUFFER_BYTES) ? BUFFER_BYTES : remaining_bytes;
@ -395,26 +487,36 @@ bool BPIFlash::write(uint32_t addr, const uint8_t *data, uint32_t len)
printInfo(buf);
}
/* Buffered Program Setup - sent to block/colony base address */
bpi_write(block_word_addr, FLASH_CMD_BUFFERED_PRG);
usleep(10);
/* Write word count (N-1) - sent to block address per datasheet */
bpi_write(block_word_addr, chunk_words - 1);
/* Write data words for BPI x16 boot.
* Two transformations (same as Vivado write_cfgmem -interface BPIx16):
* 1. Bit reversal within each byte: FPGA D00=MSBit, flash DQ[0]=LSBit
* 2. Byte swap: first bitstream byte upper flash byte D[15:8]
*/
std::vector<uint16_t> word_buf(chunk_words);
for (uint32_t w = 0; w < chunk_words; w++) {
uint32_t data_offset = offset + w * 2;
uint8_t b0 = data[data_offset];
uint8_t b1 = 0xFF; /* pad with 0xFF if odd length */
if (data_offset + 1 < len)
b1 = data[data_offset + 1];
uint16_t word = (reverseByte(b0) << 8) | reverseByte(b1);
bpi_write(word_addr + w, word);
word_buf[w] = (reverseByte(b0) << 8) | reverseByte(b1);
}
/* Buffered Program Setup - sent to block/colony base address */
bpi_write(0, FLASH_CMD_CLEAR_STATUS);
bpi_write(block_word_addr, FLASH_CMD_BUFFERED_PRG);
bpi_write(block_word_addr, chunk_words - 1);
if (_has_burst) {
bpi_burst_write(word_addr, word_buf.data(), chunk_words);
} else {
/* Software-only fallback: one IR, no per-word flush */
uint8_t user1[] = {0x02};
_jtag->shiftIR(user1, NULL, _irlen);
for (uint32_t w = 0; w < chunk_words; w++) {
bpi_write_no_flush(word_addr + w, word_buf[w]);
}
_jtag->flush();
}
/* Confirm - sent to block address */
@ -429,9 +531,6 @@ bool BPIFlash::write(uint32_t addr, const uint8_t *data, uint32_t len)
return false;
}
/* Small delay before next buffer operation */
usleep(100);
offset += chunk_words * 2;
if ((offset & 0xFFF) == 0 || offset >= len)

View File

@ -70,9 +70,10 @@ class BPIFlash {
private:
/* BPI bridge command codes (match bpiOverJtag_core.v) */
static const uint8_t CMD_WRITE = 0x1;
static const uint8_t CMD_READ = 0x2;
static const uint8_t CMD_NOP = 0x3;
static const uint8_t CMD_WRITE = 0x1;
static const uint8_t CMD_READ = 0x2;
static const uint8_t CMD_NOP = 0x3;
static const uint8_t CMD_BURST_WRITE = 0x4;
/* Intel CFI flash commands */
static const uint16_t FLASH_CMD_READ_ARRAY = 0x00FF;
@ -104,6 +105,17 @@ class BPIFlash {
*/
void bpi_write(uint32_t word_addr, uint16_t data);
/*!
* \brief Write a 16-bit word without IR shift or flush (for batched writes)
*/
void bpi_write_no_flush(uint32_t word_addr, uint16_t data);
/*!
* \brief Burst write multiple 16-bit words in a single DR shift
*/
void bpi_burst_write(uint32_t word_addr, const uint16_t *data,
uint32_t count);
/*!
* \brief Wait for operation to complete
* \return true if completed successfully
@ -122,6 +134,7 @@ class BPIFlash {
uint32_t _block_size;
uint16_t _manufacturer_id;
uint16_t _device_id;
bool _has_burst;
};
#endif // SRC_BPIFLASH_HPP_