openFPGALoader/src/bpiFlash.cpp

582 lines
16 KiB
C++
Raw Normal View History

2026-02-09 16:28:42 +01:00
// SPDX-License-Identifier: Apache-2.0
/*
* Copyright (C) 2024 openFPGALoader contributors
* BPI (Parallel NOR) Flash support via JTAG bridge
*/
#include "bpiFlash.hpp"
#include <unistd.h>
#include <cstring>
#include <stdexcept>
2026-02-09 17:58:24 +01:00
#include <vector>
2026-02-09 16:28:42 +01:00
#include "display.hpp"
#include "progressBar.hpp"
/* Bit-reverse a byte (MSB <-> LSB).
* Required for BPI x16: the FPGA's D00 pin is the MSBit of each config byte
* (AR#7112), but flash DQ[0] is the LSBit. write_cfgmem applies this
* transformation; we must do the same.
*/
static inline uint8_t reverseByte(uint8_t b)
{
b = ((b & 0xF0) >> 4) | ((b & 0x0F) << 4);
b = ((b & 0xCC) >> 2) | ((b & 0x33) << 2);
b = ((b & 0xAA) >> 1) | ((b & 0x55) << 1);
return b;
}
BPIFlash::BPIFlash(Jtag *jtag, int8_t verbose)
: _jtag(jtag), _verbose(verbose), _irlen(6),
_capacity(0), _block_size(256 * 1024),
2026-02-09 17:58:24 +01:00
_manufacturer_id(0), _device_id(0),
_has_burst(false)
2026-02-09 16:28:42 +01:00
{
}
BPIFlash::~BPIFlash()
{
}
/*
* Protocol: Single JTAG DR shift containing:
* TX: [start=1][cmd:4][addr:25][wr_data:16] = 46 bits
* RX: Data returned after execution delay
*
* Commands:
* 0x1 = Write word
* 0x2 = Read word
* 0x3 = NOP
2026-02-09 17:58:24 +01:00
* 0x4 = Burst write (addr + count + N×data words)
2026-02-09 16:28:42 +01:00
*/
uint16_t BPIFlash::bpi_read(uint32_t word_addr)
{
/* Build packet: start(1) + cmd(4) + addr(25) + padding for read response */
/* Extra bit needed due to pipeline delay in Verilog (data at offset 51, not 50) */
const int total_bits = 1 + 4 + 25 + 20 + 16 + 1; /* 67 bits total */
const int total_bytes = (total_bits + 7) / 8;
uint8_t tx[total_bytes];
uint8_t rx[total_bytes];
memset(tx, 0, total_bytes);
memset(rx, 0, total_bytes);
/* Pack: start=1, cmd=2 (read), addr (LSB first) */
uint64_t packet = 1; /* start bit */
packet |= ((uint64_t)CMD_READ) << 1; /* cmd at bits [4:1] */
packet |= ((uint64_t)(word_addr & 0x1FFFFFF)) << 5; /* addr at bits [29:5] */
/* Convert to bytes (LSB first for JTAG) */
for (int i = 0; i < 5; i++) {
tx[i] = (packet >> (i * 8)) & 0xFF;
}
/* Select USER1 instruction */
uint8_t user1[] = {0x02};
_jtag->shiftIR(user1, NULL, _irlen);
/* Shift data and get response */
_jtag->shiftDR(tx, rx, total_bits);
_jtag->flush();
/* Extract read data from response - it appears after the command execution */
/* Data starts at bit 51 (after start+cmd+addr+exec_delay+1 pipeline delay) */
int data_offset = 51;
uint16_t data = 0;
for (int i = 0; i < 16; i++) {
int bit_pos = data_offset + i;
int byte_idx = bit_pos / 8;
int bit_idx = bit_pos % 8;
if (rx[byte_idx] & (1 << bit_idx))
data |= (1 << i);
}
return data;
}
void BPIFlash::bpi_write(uint32_t word_addr, uint16_t data)
{
/* Build packet: start(1) + cmd(4) + addr(25) + data(16) + exec delay */
const int total_bits = 1 + 4 + 25 + 16 + 20;
const int total_bytes = (total_bits + 7) / 8;
uint8_t tx[total_bytes];
memset(tx, 0, total_bytes);
/* Pack: start=1, cmd=1 (write), addr, data (all LSB first) */
uint64_t packet = 1; /* start bit */
packet |= ((uint64_t)CMD_WRITE) << 1; /* cmd at bits [4:1] */
packet |= ((uint64_t)(word_addr & 0x1FFFFFF)) << 5; /* addr at bits [29:5] */
packet |= ((uint64_t)data) << 30; /* data at bits [45:30] */
/* Convert to bytes (LSB first for JTAG) */
for (int i = 0; i < 8; i++) {
tx[i] = (packet >> (i * 8)) & 0xFF;
}
if (_verbose > 1) {
char buf[256];
snprintf(buf, sizeof(buf), "bpi_write(0x%06x, 0x%04x) TX:", word_addr, data);
std::string msg = buf;
for (int i = 0; i < total_bytes; i++) {
snprintf(buf, sizeof(buf), " %02x", tx[i]);
msg += buf;
}
printInfo(msg);
}
/* Select USER1 instruction */
uint8_t user1[] = {0x02};
_jtag->shiftIR(user1, NULL, _irlen);
/* Shift data */
_jtag->shiftDR(tx, NULL, total_bits);
_jtag->flush();
}
2026-02-09 17:58:24 +01:00
void BPIFlash::bpi_write_no_flush(uint32_t word_addr, uint16_t data)
{
/* Same packet as bpi_write() but no shiftIR or flush —
* caller sets IR once before the loop and flushes once after.
*/
const int total_bits = 1 + 4 + 25 + 16 + 20;
const int total_bytes = (total_bits + 7) / 8;
uint8_t tx[total_bytes];
memset(tx, 0, total_bytes);
uint64_t packet = 1; /* start bit */
packet |= ((uint64_t)CMD_WRITE) << 1; /* cmd at bits [4:1] */
packet |= ((uint64_t)(word_addr & 0x1FFFFFF)) << 5; /* addr at bits [29:5] */
packet |= ((uint64_t)data) << 30; /* data at bits [45:30] */
for (int i = 0; i < 8; i++) {
tx[i] = (packet >> (i * 8)) & 0xFF;
}
_jtag->shiftDR(tx, NULL, total_bits);
}
void BPIFlash::bpi_burst_write(uint32_t word_addr, const uint16_t *data,
uint32_t count)
{
if (count == 0)
return;
/* Burst packet: start(1) + cmd(4) + addr(25) + count(16) + N×(data(16) + pad(21))
* Header: 46 bits. Per word: 37 bits.
*/
const uint32_t header_bits = 1 + 4 + 25 + 16; /* 46 */
const uint32_t per_word_bits = 16 + 21; /* 37: 20 exec cycles + 1 transition */
const uint32_t total_bits = header_bits + count * per_word_bits;
const uint32_t total_bytes = (total_bits + 7) / 8;
std::vector<uint8_t> tx(total_bytes, 0);
/* Helper to set a single bit in the tx buffer */
auto set_bit = [&](uint32_t bit_pos) {
tx[bit_pos / 8] |= (1 << (bit_pos % 8));
};
/* Pack header LSB-first */
uint32_t pos = 0;
/* start bit = 1 */
set_bit(pos);
pos++;
/* cmd = CMD_BURST_WRITE (4 bits) */
for (int i = 0; i < 4; i++) {
if (CMD_BURST_WRITE & (1 << i))
set_bit(pos);
pos++;
}
/* addr (25 bits) */
for (int i = 0; i < 25; i++) {
if (word_addr & (1u << i))
set_bit(pos);
pos++;
}
/* count (16 bits) */
for (int i = 0; i < 16; i++) {
if (count & (1u << i))
set_bit(pos);
pos++;
}
/* Pack each data word: 16 data bits + 21 padding bits */
for (uint32_t w = 0; w < count; w++) {
for (int i = 0; i < 16; i++) {
if (data[w] & (1 << i))
set_bit(pos);
pos++;
}
pos += 21; /* 20 exec cycles + 1 transition cycle */
}
uint8_t user1[] = {0x02};
_jtag->shiftIR(user1, NULL, _irlen);
_jtag->shiftDR(tx.data(), NULL, total_bits);
_jtag->flush();
}
2026-02-09 16:28:42 +01:00
bool BPIFlash::detect()
{
printInfo("Detecting BPI flash...");
/* Issue Read ID command to flash: write 0x0090 to any address */
bpi_write(0, FLASH_CMD_READ_ID);
/* Small delay for command to take effect */
usleep(1000);
/* Read manufacturer ID at offset 0x00 */
_manufacturer_id = bpi_read(0x00);
/* Read device ID at offset 0x01 */
_device_id = bpi_read(0x01);
/* Return to read array mode */
bpi_write(0, FLASH_CMD_READ_ARRAY);
usleep(1000);
if (_verbose) {
char buf[64];
snprintf(buf, sizeof(buf), "Raw Manufacturer ID: 0x%04x", _manufacturer_id);
printInfo(buf);
snprintf(buf, sizeof(buf), "Raw Device ID: 0x%04x", _device_id);
printInfo(buf);
}
if (_manufacturer_id == 0x0089 || _manufacturer_id == 0x8900) {
printInfo("Intel/Micron flash detected");
} else if (_manufacturer_id == 0x0020 || _manufacturer_id == 0x2000) {
printInfo("Micron flash detected");
} else if (_manufacturer_id == 0xFFFF || _manufacturer_id == 0x0000) {
char buf[64];
snprintf(buf, sizeof(buf), "No BPI flash detected (ID: 0x%04x)", _manufacturer_id);
printError(buf);
return false;
} else {
char buf[64];
snprintf(buf, sizeof(buf), "Unknown manufacturer: 0x%04x", _manufacturer_id);
printWarn(buf);
}
{
char buf[64];
snprintf(buf, sizeof(buf), "Manufacturer ID: 0x%04x", _manufacturer_id);
printInfo(buf);
snprintf(buf, sizeof(buf), "Device ID: 0x%04x", _device_id);
printInfo(buf);
}
/* MT28GU512AAA = 512Mbit = 64MB */
_capacity = 64 * 1024 * 1024;
_block_size = 256 * 1024;
printInfo("Flash capacity: 64 MB (512 Mbit)");
2026-02-09 17:58:24 +01:00
/* Enable burst write — assumes v02.00+ JTAG bitstream is loaded.
* Future: could auto-detect via USER4 version readback.
*/
_has_burst = true;
2026-02-09 16:28:42 +01:00
return true;
}
bool BPIFlash::wait_ready(uint32_t timeout_ms)
{
uint32_t elapsed = 0;
const uint32_t poll_interval = 10;
/* Issue read status command */
bpi_write(0, FLASH_CMD_READ_STATUS);
usleep(100);
while (elapsed < timeout_ms) {
uint16_t status = bpi_read(0);
/* Intel CFI status register is 8 bits, upper byte undefined */
uint8_t sr = status & 0xFF;
if (sr & SR_READY) {
if (sr & (SR_ERASE_ERR | SR_PROG_ERR | SR_VPP_ERR)) {
char buf[64];
snprintf(buf, sizeof(buf), "BPI Flash error: status = 0x%02x", sr);
printError(buf);
bpi_write(0, FLASH_CMD_CLEAR_STATUS);
return false;
}
/* Return to read array mode */
bpi_write(0, FLASH_CMD_READ_ARRAY);
return true;
}
usleep(poll_interval * 1000);
elapsed += poll_interval;
}
printError("BPI Flash timeout");
return false;
}
bool BPIFlash::unlock_block(uint32_t word_addr)
{
bpi_write(word_addr, FLASH_CMD_UNLOCK_BLOCK);
usleep(100);
bpi_write(word_addr, FLASH_CMD_UNLOCK_CONF);
usleep(100);
return true;
}
bool BPIFlash::erase_block(uint32_t addr)
{
uint32_t word_addr = addr >> 1;
if (_verbose) {
char buf[64];
snprintf(buf, sizeof(buf), "Erasing block at 0x%06x", addr);
printInfo(buf);
}
unlock_block(word_addr);
/* Block erase command sequence */
bpi_write(word_addr, FLASH_CMD_BLOCK_ERASE);
usleep(100);
bpi_write(word_addr, FLASH_CMD_CONFIRM);
if (!wait_ready(30000)) {
printError("Block erase failed");
return false;
}
/* Verify erase by reading first few words */
if (_verbose) {
bpi_write(0, FLASH_CMD_READ_ARRAY);
usleep(100);
char buf[128];
snprintf(buf, sizeof(buf), "Verify erase at 0x%06x:", addr);
printInfo(buf);
for (int i = 0; i < 4; i++) {
uint16_t val = bpi_read(word_addr + i);
snprintf(buf, sizeof(buf), " [0x%06x] = 0x%04x %s",
addr + i*2, val, (val == 0xFFFF) ? "(OK)" : "(NOT ERASED!)");
printInfo(buf);
}
}
return true;
}
bool BPIFlash::bulk_erase()
{
printInfo("Bulk erasing BPI flash...");
uint32_t num_blocks = _capacity / _block_size;
ProgressBar progress("Erasing", num_blocks, 50, _verbose > 0);
for (uint32_t i = 0; i < num_blocks; i++) {
uint32_t block_addr = i * _block_size;
if (!erase_block(block_addr)) {
progress.fail();
return false;
}
progress.display(i + 1);
}
progress.done();
return true;
}
bool BPIFlash::read(uint8_t *data, uint32_t addr, uint32_t len)
{
if (_verbose)
printInfo("Reading " + std::to_string(len) + " bytes from 0x" +
std::to_string(addr));
/* Ensure read array mode */
bpi_write(0, FLASH_CMD_READ_ARRAY);
usleep(100);
ProgressBar progress("Reading", len, 50, _verbose > 0);
for (uint32_t i = 0; i < len; i += 2) {
uint32_t word_addr = (addr + i) >> 1;
uint16_t word = bpi_read(word_addr);
data[i] = reverseByte((word >> 8) & 0xFF);
if (i + 1 < len)
data[i + 1] = reverseByte(word & 0xFF);
if ((i & 0xFFF) == 0)
progress.display(i);
}
progress.done();
return true;
}
bool BPIFlash::write(uint32_t addr, const uint8_t *data, uint32_t len)
{
char buf[128];
snprintf(buf, sizeof(buf), "Writing %u bytes to BPI flash at 0x%06x", len, addr);
printInfo(buf);
/* Calculate blocks to erase */
uint32_t start_block = addr / _block_size;
uint32_t end_block = (addr + len - 1) / _block_size;
/* Erase required blocks */
printInfo("Erasing " + std::to_string(end_block - start_block + 1) +
" blocks...");
for (uint32_t block = start_block; block <= end_block; block++) {
if (!erase_block(block * _block_size)) {
return false;
}
}
/* Program data using buffered programming (0x00E9)
* Sequence: Setup(0xE9) -> WordCount(N-1) -> N data words -> Confirm(0xD0)
*/
printInfo("Programming (buffered mode)...");
ProgressBar progress("Writing", len, 50, _verbose > 0);
/* MT28GU512AAA has 1KB programming regions - must program entire region at once
* to avoid object mode issues. Buffer size = 512 words = 1KB = one programming region.
*/
const uint32_t BUFFER_WORDS = 512; /* 512 words = 1KB = one programming region */
const uint32_t BUFFER_BYTES = BUFFER_WORDS * 2;
uint32_t last_block = 0xFFFFFFFF; /* Track which block is unlocked */
uint32_t offset = 0;
while (offset < len) {
uint32_t byte_addr = addr + offset;
uint32_t word_addr = byte_addr >> 1;
/* Calculate block address (word address of block start) */
uint32_t block_word_addr = (byte_addr / _block_size) * (_block_size >> 1);
/* Unlock only when entering a new block */
uint32_t current_block = byte_addr / _block_size;
if (current_block != last_block) {
unlock_block(block_word_addr);
last_block = current_block;
}
/* Calculate how many words to write in this buffer */
uint32_t remaining_bytes = len - offset;
uint32_t chunk_bytes = (remaining_bytes > BUFFER_BYTES) ? BUFFER_BYTES : remaining_bytes;
uint32_t chunk_words = (chunk_bytes + 1) / 2;
/* Don't cross block boundaries */
uint32_t bytes_to_block_end = _block_size - (byte_addr % _block_size);
if (chunk_bytes > bytes_to_block_end)
chunk_bytes = bytes_to_block_end;
chunk_words = (chunk_bytes + 1) / 2;
if (_verbose > 1) {
char buf[128];
snprintf(buf, sizeof(buf), "Buffered write: addr=0x%06x, words=%u, block=0x%06x",
byte_addr, chunk_words, block_word_addr << 1);
printInfo(buf);
}
/* Write data words for BPI x16 boot.
* Two transformations (same as Vivado write_cfgmem -interface BPIx16):
* 1. Bit reversal within each byte: FPGA D00=MSBit, flash DQ[0]=LSBit
* 2. Byte swap: first bitstream byte upper flash byte D[15:8]
*/
2026-02-09 17:58:24 +01:00
std::vector<uint16_t> word_buf(chunk_words);
2026-02-09 16:28:42 +01:00
for (uint32_t w = 0; w < chunk_words; w++) {
uint32_t data_offset = offset + w * 2;
uint8_t b0 = data[data_offset];
uint8_t b1 = 0xFF; /* pad with 0xFF if odd length */
if (data_offset + 1 < len)
b1 = data[data_offset + 1];
2026-02-09 17:58:24 +01:00
word_buf[w] = (reverseByte(b0) << 8) | reverseByte(b1);
}
/* Buffered Program Setup - sent to block/colony base address */
bpi_write(0, FLASH_CMD_CLEAR_STATUS);
bpi_write(block_word_addr, FLASH_CMD_BUFFERED_PRG);
bpi_write(block_word_addr, chunk_words - 1);
if (_has_burst) {
bpi_burst_write(word_addr, word_buf.data(), chunk_words);
} else {
/* Software-only fallback: one IR, no per-word flush */
uint8_t user1[] = {0x02};
_jtag->shiftIR(user1, NULL, _irlen);
for (uint32_t w = 0; w < chunk_words; w++) {
bpi_write_no_flush(word_addr + w, word_buf[w]);
}
_jtag->flush();
2026-02-09 16:28:42 +01:00
}
/* Confirm - sent to block address */
bpi_write(block_word_addr, FLASH_CMD_CONFIRM);
/* Wait for program to complete */
if (!wait_ready(5000)) {
char buf[64];
snprintf(buf, sizeof(buf), "Buffered program failed at address 0x%06x", byte_addr);
printError(buf);
progress.fail();
return false;
}
offset += chunk_words * 2;
if ((offset & 0xFFF) == 0 || offset >= len)
progress.display(offset);
}
/* Return to read array mode */
bpi_write(0, FLASH_CMD_READ_ARRAY);
progress.done();
/* Verify first 32 words */
printInfo("Verifying first 32 words...");
usleep(1000);
bpi_write(0, FLASH_CMD_READ_ARRAY);
usleep(100);
bool verify_ok = true;
for (uint32_t i = 0; i < 64 && i < len; i += 2) {
uint8_t b0 = data[i];
uint8_t b1 = 0xFF;
if (i + 1 < len)
b1 = data[i + 1];
uint16_t expected = (reverseByte(b0) << 8) | reverseByte(b1);
uint16_t actual = bpi_read(i >> 1);
if (actual != expected) {
char buf[128];
snprintf(buf, sizeof(buf), "Verify FAIL at 0x%04x: expected 0x%04x, got 0x%04x",
i, expected, actual);
printError(buf);
verify_ok = false;
} else if (_verbose) {
char buf[128];
snprintf(buf, sizeof(buf), "Verify OK at 0x%04x: 0x%04x", i, actual);
printInfo(buf);
}
}
if (verify_ok) {
printInfo("Verification passed for first 32 words");
} else {
printError("Verification FAILED!");
}
printInfo("BPI flash programming complete");
return true;
}