catalyst-n1 / rtl /scalable_core_v2.v
mrwabbit's picture
Initial upload: Catalyst N1 open source neuromorphic processor RTL
e4cdd5f verified
// ============================================================================
// Scalable Core V2
// ============================================================================
//
// Copyright 2026 Henry Arthur Shulayev Barnes / Catalyst Neuromorphic Ltd
// Company No. 17054540 — UK Patent Application No. 2602902.6
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// ============================================================================
module scalable_core_v2 #(
parameter NUM_NEURONS = 1024,
parameter NEURON_BITS = 10,
parameter DATA_WIDTH = 16,
parameter POOL_DEPTH = 131072,
parameter POOL_ADDR_BITS = 17,
parameter COUNT_BITS = 12,
parameter REV_FANIN = 32,
parameter REV_SLOT_BITS = 5,
parameter THRESHOLD = 16'sd1000,
parameter LEAK_RATE = 16'sd3,
parameter RESTING_POT = 16'sd0,
parameter REFRAC_CYCLES = 4,
parameter TRACE_MAX = 8'd100,
parameter TRACE_DECAY = 8'd3,
parameter LEARN_SHIFT = 3,
parameter GRADE_SHIFT = 7,
parameter COMPARTMENT_BITS = 2,
parameter signed [DATA_WIDTH-1:0] DEND_THRESHOLD = 16'sd0,
parameter signed [DATA_WIDTH-1:0] WEIGHT_MAX = 16'sd2000,
parameter signed [DATA_WIDTH-1:0] WEIGHT_MIN = 16'sd0,
parameter REWARD_SHIFT = 7,
parameter ELIG_DECAY_SHIFT = 3,
parameter signed [DATA_WIDTH-1:0] ELIG_MAX = 16'sd1000,
parameter [15:0] NOISE_LFSR_SEED = 16'hACE1,
parameter [3:0] TAU1_DEFAULT = 4'd3,
parameter [3:0] TAU2_DEFAULT = 4'd4,
parameter DELAY_BITS = 6,
parameter DELAY_ENTRIES_PER_TS = 64,
parameter DELAY_ENTRY_BITS = 6,
parameter NEURON_WIDTH = 24
)(
input wire clk,
input wire rst_n,
input wire start,
input wire learn_enable,
input wire graded_enable,
input wire dendritic_enable,
input wire threefactor_enable,
input wire noise_enable,
input wire skip_idle_enable,
input wire scale_u_enable,
input wire signed [DATA_WIDTH-1:0] reward_value,
input wire ext_valid,
input wire [NEURON_BITS-1:0] ext_neuron_id,
input wire signed [DATA_WIDTH-1:0] ext_current,
input wire pool_we,
input wire [POOL_ADDR_BITS-1:0] pool_addr_in,
input wire [NEURON_BITS-1:0] pool_src_in,
input wire [NEURON_BITS-1:0] pool_target_in,
input wire signed [DATA_WIDTH-1:0] pool_weight_in,
input wire [COMPARTMENT_BITS-1:0] pool_comp_in,
input wire index_we,
input wire [NEURON_BITS-1:0] index_neuron_in,
input wire [POOL_ADDR_BITS-1:0] index_base_in,
input wire [COUNT_BITS-1:0] index_count_in,
input wire [1:0] index_format_in,
input wire delay_we,
input wire [POOL_ADDR_BITS-1:0] delay_addr_in,
input wire [DELAY_BITS-1:0] delay_value_in,
input wire ucode_prog_we,
input wire [7:0] ucode_prog_addr,
input wire [31:0] ucode_prog_data,
input wire prog_param_we,
input wire [NEURON_BITS-1:0] prog_param_neuron,
input wire [4:0] prog_param_id,
input wire signed [DATA_WIDTH-1:0] prog_param_value,
input wire probe_read,
input wire [NEURON_BITS-1:0] probe_neuron,
input wire [4:0] probe_state_id,
input wire [POOL_ADDR_BITS-1:0] probe_pool_addr,
output reg signed [DATA_WIDTH-1:0] probe_data,
output reg probe_valid,
output reg timestep_done,
output reg spike_out_valid,
output reg [NEURON_BITS-1:0] spike_out_id,
output reg [7:0] spike_out_payload,
output wire [5:0] state_out,
output reg [31:0] total_spikes,
output reg [31:0] timestep_count,
output wire core_idle
);
localparam S_IDLE = 6'd0;
localparam S_DELIVER_POP = 6'd1;
localparam S_DELIVER_IDX_WAIT = 6'd2;
localparam S_DELIVER_IDX_READ = 6'd3;
localparam S_DELIVER_POOL_WAIT = 6'd4;
localparam S_DELIVER_ADDR = 6'd5;
localparam S_DELIVER_ACC_WAIT = 6'd6;
localparam S_DELIVER_ACC = 6'd7;
localparam S_DELIVER_NEXT = 6'd8;
localparam S_UPDATE_INIT = 6'd9;
localparam S_UPDATE_READ = 6'd10;
localparam S_UPDATE_CALC = 6'd11;
localparam S_UPDATE_WRITE = 6'd12;
localparam S_LEARN_MC_SCAN = 6'd13;
localparam S_LEARN_MC_IDX_WAIT = 6'd14;
localparam S_LEARN_MC_IDX_READ = 6'd15;
localparam S_LEARN_MC_SETUP = 6'd16;
localparam S_LEARN_MC_WAIT1 = 6'd17;
localparam S_LEARN_MC_LOAD = 6'd18;
localparam S_LEARN_MC_WAIT2 = 6'd19;
localparam S_LEARN_MC_REGLD = 6'd20;
localparam S_LEARN_MC_FETCH = 6'd21;
localparam S_DONE = 6'd22;
localparam S_LEARN_MC_EXEC = 6'd23;
localparam S_LEARN_MC_NEXT = 6'd24;
localparam S_ELIG_MC = 6'd25;
localparam S_DELAY_DRAIN_INIT = 6'd26;
localparam S_DELAY_DRAIN_QWAIT = 6'd27;
localparam S_DELAY_DRAIN_CAP = 6'd28;
localparam S_DELAY_DRAIN_AWAIT = 6'd29;
localparam S_DELAY_DRAIN_ACC = 6'd30;
localparam S_UPDATE_PARENT_ADDR = 6'd31;
localparam S_UPDATE_PARENT_WAIT = 6'd32;
localparam S_UPDATE_PARENT_ACC = 6'd33;
localparam S_DELIVER_AXTYPE = 6'd34;
function signed [NEURON_WIDTH-1:0] raz_div4096;
input signed [NEURON_WIDTH+11:0] product;
reg signed [NEURON_WIDTH-1:0] truncated;
reg has_frac;
begin
truncated = product[NEURON_WIDTH+11:12];
has_frac = |product[11:0];
if (has_frac)
raz_div4096 = truncated + (product[NEURON_WIDTH+11] ? -1 : 1);
else
raz_div4096 = truncated;
end
endfunction
reg [5:0] state;
assign state_out = state;
reg was_idle;
reg any_spike_this_ts;
assign core_idle = was_idle;
wire signed [DATA_WIDTH-1:0] probe_nrn_rdata;
wire [7:0] probe_ref_rdata;
wire signed [NEURON_WIDTH-1:0] probe_acc_rdata;
wire signed [DATA_WIDTH-1:0] probe_wt_rdata;
wire signed [DATA_WIDTH-1:0] probe_elig_rdata;
wire [7:0] probe_trace1_rdata;
wire [7:0] probe_trace2_rdata;
wire signed [DATA_WIDTH-1:0] probe_dend1_rdata;
wire signed [DATA_WIDTH-1:0] probe_dend2_rdata;
wire signed [DATA_WIDTH-1:0] probe_dend3_rdata;
reg [31:0] perf_spike_count;
reg [31:0] perf_active_cycles;
reg [31:0] perf_synaptic_ops;
wire [31:0] perf_power_estimate = (perf_spike_count << 3) +
(perf_synaptic_ops << 1) + perf_active_cycles;
reg trace_fifo_enable;
reg [31:0] trace_fifo_mem [0:63];
reg [6:0] trace_wr_ptr, trace_rd_ptr;
wire [6:0] trace_count_val = trace_wr_ptr - trace_rd_ptr;
wire trace_fifo_full = (trace_count_val >= 7'd64);
wire trace_fifo_empty = (trace_wr_ptr == trace_rd_ptr);
reg [31:0] trace_last_popped;
reg probe_active_r;
reg [4:0] probe_sid_r;
always @(posedge clk or negedge rst_n) begin
if (!rst_n) begin
probe_active_r <= 0;
probe_sid_r <= 0;
probe_valid <= 0;
probe_data <= 0;
end else begin
probe_active_r <= probe_read && (state == S_IDLE);
probe_sid_r <= probe_state_id;
probe_valid <= probe_active_r;
if (probe_active_r) begin
case (probe_sid_r)
5'd0: probe_data <= probe_nrn_rdata;
5'd1: probe_data <= param_thr_rdata[DATA_WIDTH-1:0];
5'd2: probe_data <= {{(DATA_WIDTH-8){1'b0}}, probe_trace1_rdata};
5'd3: probe_data <= {{(DATA_WIDTH-8){1'b0}}, probe_trace2_rdata};
5'd4: probe_data <= {{(DATA_WIDTH-4){1'b0}}, probe_ref_rdata};
5'd5: probe_data <= probe_acc_rdata[DATA_WIDTH-1:0];
5'd6: probe_data <= probe_dend1_rdata;
5'd7: probe_data <= probe_dend2_rdata;
5'd8: probe_data <= probe_dend3_rdata;
5'd9: probe_data <= param_leak_rdata;
5'd10: probe_data <= param_rest_rdata;
5'd11: probe_data <= probe_wt_rdata;
5'd12: probe_data <= probe_elig_rdata;
5'd13: probe_data <= probe_cur_full[DATA_WIDTH-1:0];
5'd14: probe_data <= perf_spike_count[15:0];
5'd15: probe_data <= perf_spike_count[31:16];
5'd16: probe_data <= perf_active_cycles[15:0];
5'd17: probe_data <= perf_active_cycles[31:16];
5'd18: probe_data <= perf_synaptic_ops[15:0];
5'd19: probe_data <= perf_synaptic_ops[31:16];
5'd20: probe_data <= perf_power_estimate[15:0];
5'd21: probe_data <= perf_power_estimate[31:16];
5'd22: probe_data <= trace_fifo_empty ? 16'hFFFF :
trace_fifo_mem[trace_rd_ptr[5:0]][15:0];
5'd23: probe_data <= trace_last_popped[31:16];
5'd24: probe_data <= {9'd0, trace_count_val};
default: probe_data <= 16'sd0;
endcase
end
end
end
reg nrn_we;
reg [NEURON_BITS-1:0] nrn_addr;
reg signed [NEURON_WIDTH-1:0] nrn_wdata;
wire signed [NEURON_WIDTH-1:0] nrn_rdata;
wire signed [NEURON_WIDTH-1:0] probe_nrn_full;
sram #(.DATA_WIDTH(NEURON_WIDTH), .ADDR_WIDTH(NEURON_BITS)) neuron_mem (
.clk(clk), .we_a(nrn_we), .addr_a(nrn_addr),
.wdata_a(nrn_wdata), .rdata_a(nrn_rdata),
.addr_b(probe_neuron), .rdata_b(probe_nrn_full)
);
assign probe_nrn_rdata = probe_nrn_full[DATA_WIDTH-1:0];
reg cur_we;
reg [NEURON_BITS-1:0] cur_addr;
reg signed [NEURON_WIDTH-1:0] cur_wdata;
wire signed [NEURON_WIDTH-1:0] cur_rdata;
wire signed [NEURON_WIDTH-1:0] probe_cur_full;
sram #(.DATA_WIDTH(NEURON_WIDTH), .ADDR_WIDTH(NEURON_BITS)) current_mem (
.clk(clk), .we_a(cur_we), .addr_a(cur_addr),
.wdata_a(cur_wdata), .rdata_a(cur_rdata),
.addr_b(probe_neuron), .rdata_b(probe_cur_full)
);
reg ref_we;
reg [NEURON_BITS-1:0] ref_addr;
reg [7:0] ref_wdata;
wire [7:0] ref_rdata;
sram #(.DATA_WIDTH(8), .ADDR_WIDTH(NEURON_BITS)) refrac_mem (
.clk(clk), .we_a(ref_we), .addr_a(ref_addr),
.wdata_a(ref_wdata), .rdata_a(ref_rdata),
.addr_b(probe_neuron), .rdata_b(probe_ref_rdata)
);
reg acc_we;
reg [NEURON_BITS-1:0] acc_addr;
reg signed [NEURON_WIDTH-1:0] acc_wdata;
wire signed [NEURON_WIDTH-1:0] acc_rdata;
sram #(.DATA_WIDTH(NEURON_WIDTH), .ADDR_WIDTH(NEURON_BITS)) acc_mem (
.clk(clk), .we_a(acc_we), .addr_a(acc_addr),
.wdata_a(acc_wdata), .rdata_a(acc_rdata),
.addr_b(probe_neuron), .rdata_b(probe_acc_rdata)
);
localparam INDEX_WIDTH = 2 + POOL_ADDR_BITS + COUNT_BITS;
localparam FMT_SPARSE = 2'd0;
localparam FMT_DENSE = 2'd1;
localparam FMT_POP = 2'd2;
reg [NEURON_BITS-1:0] index_rd_addr;
wire [INDEX_WIDTH-1:0] index_rdata;
wire index_we_mux = (state == S_IDLE) ? index_we : 1'b0;
wire [NEURON_BITS-1:0] index_addr_mux = (state == S_IDLE) ? index_neuron_in : index_rd_addr;
wire [INDEX_WIDTH-1:0] index_wdata_mux = {index_format_in, index_base_in, index_count_in};
sram #(.DATA_WIDTH(INDEX_WIDTH), .ADDR_WIDTH(NEURON_BITS)) index_mem (
.clk(clk), .we_a(index_we_mux), .addr_a(index_addr_mux),
.wdata_a(index_wdata_mux), .rdata_a(index_rdata),
.addr_b({NEURON_BITS{1'b0}}), .rdata_b()
);
reg [POOL_ADDR_BITS-1:0] pool_addr_r;
wire [NEURON_BITS-1:0] pool_tgt_rdata;
wire pool_tgt_we_mux = (state == S_IDLE) ? pool_we : 1'b0;
wire [POOL_ADDR_BITS-1:0] pool_tgt_addr_mux = (state == S_IDLE) ? pool_addr_in : pool_addr_r;
sram #(.DATA_WIDTH(NEURON_BITS), .ADDR_WIDTH(POOL_ADDR_BITS)) pool_target_mem (
.clk(clk), .we_a(pool_tgt_we_mux), .addr_a(pool_tgt_addr_mux),
.wdata_a((state == S_IDLE) ? pool_target_in : {NEURON_BITS{1'b0}}),
.rdata_a(pool_tgt_rdata),
.addr_b({POOL_ADDR_BITS{1'b0}}), .rdata_b()
);
reg pool_wt_we_r;
reg [POOL_ADDR_BITS-1:0] pool_wt_wr_addr;
reg signed [DATA_WIDTH-1:0] pool_wt_wr_data;
wire signed [DATA_WIDTH-1:0] pool_wt_rdata;
wire pool_wt_we_mux = (state == S_IDLE) ? pool_we : pool_wt_we_r;
wire [POOL_ADDR_BITS-1:0] pool_wt_addr_mux = (state == S_IDLE) ? pool_addr_in :
(pool_wt_we_r ? pool_wt_wr_addr : pool_addr_r);
wire signed [DATA_WIDTH-1:0] pool_wt_wdata_mux = (state == S_IDLE) ? pool_weight_in : pool_wt_wr_data;
sram #(.DATA_WIDTH(DATA_WIDTH), .ADDR_WIDTH(POOL_ADDR_BITS)) pool_weight_mem (
.clk(clk), .we_a(pool_wt_we_mux), .addr_a(pool_wt_addr_mux),
.wdata_a(pool_wt_wdata_mux), .rdata_a(pool_wt_rdata),
.addr_b(probe_pool_addr), .rdata_b(probe_wt_rdata)
);
wire [COMPARTMENT_BITS-1:0] pool_comp_rdata;
wire pool_comp_we_mux = (state == S_IDLE) ? pool_we : 1'b0;
wire [POOL_ADDR_BITS-1:0] pool_comp_addr_mux = (state == S_IDLE) ? pool_addr_in : pool_addr_r;
sram #(.DATA_WIDTH(COMPARTMENT_BITS), .ADDR_WIDTH(POOL_ADDR_BITS)) pool_comp_mem (
.clk(clk), .we_a(pool_comp_we_mux), .addr_a(pool_comp_addr_mux),
.wdata_a((state == S_IDLE) ? pool_comp_in : {COMPARTMENT_BITS{1'b0}}),
.rdata_a(pool_comp_rdata),
.addr_b({POOL_ADDR_BITS{1'b0}}), .rdata_b()
);
reg elig_we;
reg [POOL_ADDR_BITS-1:0] elig_addr;
reg signed [DATA_WIDTH-1:0] elig_wdata;
wire signed [DATA_WIDTH-1:0] elig_rdata;
sram #(.DATA_WIDTH(DATA_WIDTH), .ADDR_WIDTH(POOL_ADDR_BITS)) elig_mem (
.clk(clk), .we_a(elig_we), .addr_a(elig_addr),
.wdata_a(elig_wdata), .rdata_a(elig_rdata),
.addr_b(probe_pool_addr), .rdata_b(probe_elig_rdata)
);
localparam UCODE_DEPTH = 256;
localparam UCODE_ADDR_BITS = 8;
localparam UCODE_WIDTH = 32;
localparam [15:0] MC_WEIGHT_MIN = WEIGHT_MIN;
localparam [15:0] MC_WEIGHT_MAX = WEIGHT_MAX;
localparam [15:0] MC_ELIG_MAX = ELIG_MAX;
localparam [15:0] MC_NEG_ELIG_MAX = -ELIG_MAX;
reg [UCODE_ADDR_BITS-1:0] mc_pc;
wire [UCODE_WIDTH-1:0] ucode_rdata;
wire mc_ucode_we = (state == S_IDLE) ? ucode_prog_we : 1'b0;
wire [UCODE_ADDR_BITS-1:0] mc_ucode_addr = (state == S_IDLE) ? ucode_prog_addr : mc_pc;
sram #(.DATA_WIDTH(UCODE_WIDTH), .ADDR_WIDTH(UCODE_ADDR_BITS)) ucode_mem (
.clk(clk), .we_a(mc_ucode_we), .addr_a(mc_ucode_addr),
.wdata_a(ucode_prog_data), .rdata_a(ucode_rdata),
.addr_b({UCODE_ADDR_BITS{1'b0}}), .rdata_b()
);
reg signed [DATA_WIDTH-1:0] mc_regs [0:15];
reg [1:0] elig_phase;
localparam DELAY_QUEUE_ADDR_W = DELAY_BITS + DELAY_ENTRY_BITS;
localparam DELAY_QUEUE_ENTRY_W = NEURON_BITS + DATA_WIDTH + COMPARTMENT_BITS;
wire [DELAY_BITS-1:0] pool_delay_rdata;
reg pool_delay_we_learn;
reg [POOL_ADDR_BITS-1:0] pool_delay_learn_addr;
reg [5:0] pool_delay_learn_data;
wire pool_delay_we_mux = (state == S_IDLE) ? delay_we : pool_delay_we_learn;
wire [POOL_ADDR_BITS-1:0] pool_delay_addr_mux = (state == S_IDLE) ? delay_addr_in :
(pool_delay_we_learn ? pool_delay_learn_addr : pool_addr_r);
wire [5:0] pool_delay_wdata_mux = (state == S_IDLE) ? delay_value_in : pool_delay_learn_data;
sram #(.DATA_WIDTH(DELAY_BITS), .ADDR_WIDTH(POOL_ADDR_BITS)) pool_delay_mem (
.clk(clk), .we_a(pool_delay_we_mux), .addr_a(pool_delay_addr_mux),
.wdata_a(pool_delay_wdata_mux),
.rdata_a(pool_delay_rdata),
.addr_b({POOL_ADDR_BITS{1'b0}}), .rdata_b()
);
wire signed [DATA_WIDTH-1:0] pool_tag_rdata;
reg pool_tag_we_r;
reg [POOL_ADDR_BITS-1:0] pool_tag_wr_addr;
reg signed [DATA_WIDTH-1:0] pool_tag_wr_data;
wire pool_tag_we_mux = (state == S_IDLE) ? 1'b0 : pool_tag_we_r;
wire [POOL_ADDR_BITS-1:0] pool_tag_addr_mux = (state == S_IDLE) ? pool_addr_r :
(pool_tag_we_r ? pool_tag_wr_addr : pool_addr_r);
sram #(.DATA_WIDTH(DATA_WIDTH), .ADDR_WIDTH(POOL_ADDR_BITS)) pool_tag_mem (
.clk(clk), .we_a(pool_tag_we_mux), .addr_a(pool_tag_addr_mux),
.wdata_a(pool_tag_wr_data), .rdata_a(pool_tag_rdata),
.addr_b({POOL_ADDR_BITS{1'b0}}), .rdata_b()
);
reg dq_we;
reg [DELAY_QUEUE_ADDR_W-1:0] dq_addr;
reg [DELAY_QUEUE_ENTRY_W-1:0] dq_wdata;
wire [DELAY_QUEUE_ENTRY_W-1:0] dq_rdata;
sram #(.DATA_WIDTH(DELAY_QUEUE_ENTRY_W), .ADDR_WIDTH(DELAY_QUEUE_ADDR_W)) delay_queue_mem (
.clk(clk), .we_a(dq_we), .addr_a(dq_addr),
.wdata_a(dq_wdata), .rdata_a(dq_rdata),
.addr_b({DELAY_QUEUE_ADDR_W{1'b0}}), .rdata_b()
);
reg [DELAY_ENTRY_BITS:0] delay_count [0:(1 << DELAY_BITS)-1];
reg [DELAY_BITS-1:0] current_ts_mod64;
reg [DELAY_ENTRY_BITS:0] drain_cnt;
reg [DELAY_ENTRY_BITS-1:0] drain_idx;
reg [NEURON_BITS-1:0] dq_cap_target;
reg signed [DATA_WIDTH-1:0] dq_cap_current;
reg [COMPARTMENT_BITS-1:0] dq_cap_comp;
wire [DELAY_BITS-1:0] delivery_ts = current_ts_mod64 + pool_delay_rdata;
wire signed [DATA_WIDTH-1:0] delivered_current = graded_enable ? graded_current : saved_weight;
integer dci;
initial begin
for (dci = 0; dci < (1 << DELAY_BITS); dci = dci + 1)
delay_count[dci] = 0;
end
reg dend_acc_1_we;
reg [NEURON_BITS-1:0] dend_acc_1_addr;
reg signed [DATA_WIDTH-1:0] dend_acc_1_wdata;
wire signed [DATA_WIDTH-1:0] dend_acc_1_rdata;
sram #(.DATA_WIDTH(DATA_WIDTH), .ADDR_WIDTH(NEURON_BITS)) dend_acc_1_mem (
.clk(clk), .we_a(dend_acc_1_we), .addr_a(dend_acc_1_addr),
.wdata_a(dend_acc_1_wdata), .rdata_a(dend_acc_1_rdata),
.addr_b(probe_neuron), .rdata_b(probe_dend1_rdata)
);
reg dend_acc_2_we;
reg [NEURON_BITS-1:0] dend_acc_2_addr;
reg signed [DATA_WIDTH-1:0] dend_acc_2_wdata;
wire signed [DATA_WIDTH-1:0] dend_acc_2_rdata;
sram #(.DATA_WIDTH(DATA_WIDTH), .ADDR_WIDTH(NEURON_BITS)) dend_acc_2_mem (
.clk(clk), .we_a(dend_acc_2_we), .addr_a(dend_acc_2_addr),
.wdata_a(dend_acc_2_wdata), .rdata_a(dend_acc_2_rdata),
.addr_b(probe_neuron), .rdata_b(probe_dend2_rdata)
);
reg dend_acc_3_we;
reg [NEURON_BITS-1:0] dend_acc_3_addr;
reg signed [DATA_WIDTH-1:0] dend_acc_3_wdata;
wire signed [DATA_WIDTH-1:0] dend_acc_3_rdata;
sram #(.DATA_WIDTH(DATA_WIDTH), .ADDR_WIDTH(NEURON_BITS)) dend_acc_3_mem (
.clk(clk), .we_a(dend_acc_3_we), .addr_a(dend_acc_3_addr),
.wdata_a(dend_acc_3_wdata), .rdata_a(dend_acc_3_rdata),
.addr_b(probe_neuron), .rdata_b(probe_dend3_rdata)
);
reg trace_we;
reg [NEURON_BITS-1:0] trace_addr;
reg [7:0] trace_wdata;
wire [7:0] trace_rdata;
sram #(.DATA_WIDTH(8), .ADDR_WIDTH(NEURON_BITS)) trace_mem (
.clk(clk), .we_a(trace_we), .addr_a(trace_addr),
.wdata_a(trace_wdata), .rdata_a(trace_rdata),
.addr_b(probe_neuron), .rdata_b(probe_trace1_rdata)
);
reg trace2_we;
reg [NEURON_BITS-1:0] trace2_addr;
reg [7:0] trace2_wdata;
wire [7:0] trace2_rdata;
sram #(.DATA_WIDTH(8), .ADDR_WIDTH(NEURON_BITS)) trace2_mem (
.clk(clk), .we_a(trace2_we), .addr_a(trace2_addr),
.wdata_a(trace2_wdata), .rdata_a(trace2_rdata),
.addr_b(probe_neuron), .rdata_b(probe_trace2_rdata)
);
reg x2_trace_we;
reg [NEURON_BITS-1:0] x2_trace_addr;
reg [7:0] x2_trace_wdata;
wire [7:0] x2_trace_rdata;
sram #(.DATA_WIDTH(8), .ADDR_WIDTH(NEURON_BITS)) x2_trace_mem (
.clk(clk), .we_a(x2_trace_we), .addr_a(x2_trace_addr),
.wdata_a(x2_trace_wdata), .rdata_a(x2_trace_rdata),
.addr_b(probe_neuron), .rdata_b()
);
reg y2_trace_we;
reg [NEURON_BITS-1:0] y2_trace_addr;
reg [7:0] y2_trace_wdata;
wire [7:0] y2_trace_rdata;
sram #(.DATA_WIDTH(8), .ADDR_WIDTH(NEURON_BITS)) y2_trace_mem (
.clk(clk), .we_a(y2_trace_we), .addr_a(y2_trace_addr),
.wdata_a(y2_trace_wdata), .rdata_a(y2_trace_rdata),
.addr_b(probe_neuron), .rdata_b()
);
reg y3_trace_we;
reg [NEURON_BITS-1:0] y3_trace_addr;
reg [7:0] y3_trace_wdata;
wire [7:0] y3_trace_rdata;
sram #(.DATA_WIDTH(8), .ADDR_WIDTH(NEURON_BITS)) y3_trace_mem (
.clk(clk), .we_a(y3_trace_we), .addr_a(y3_trace_addr),
.wdata_a(y3_trace_wdata), .rdata_a(y3_trace_rdata),
.addr_b(probe_neuron), .rdata_b()
);
localparam REV_DATA_W = 1 + NEURON_BITS + POOL_ADDR_BITS;
localparam REV_ADDR_W = NEURON_BITS + REV_SLOT_BITS;
reg [REV_ADDR_W-1:0] rev_addr;
wire [REV_DATA_W-1:0] rev_rdata;
reg [REV_SLOT_BITS-1:0] rev_count [0:NUM_NEURONS-1];
wire rev_we_mux = (state == S_IDLE) ? pool_we : 1'b0;
wire [REV_ADDR_W-1:0] rev_addr_mux = (state == S_IDLE) ?
{pool_target_in, rev_count[pool_target_in]} : rev_addr;
wire [REV_DATA_W-1:0] rev_wdata_mux = (state == S_IDLE) ?
{1'b1, pool_src_in, pool_addr_in} : {REV_DATA_W{1'b0}};
sram #(.DATA_WIDTH(REV_DATA_W), .ADDR_WIDTH(REV_ADDR_W)) rev_conn_mem (
.clk(clk), .we_a(rev_we_mux), .addr_a(rev_addr_mux),
.wdata_a(rev_wdata_mux), .rdata_a(rev_rdata),
.addr_b({REV_ADDR_W{1'b0}}), .rdata_b()
);
integer rci;
initial begin
for (rci = 0; rci < NUM_NEURONS; rci = rci + 1)
rev_count[rci] = 0;
end
wire [NEURON_BITS-1:0] param_sram_addr =
(state == S_IDLE) ? prog_param_neuron : proc_neuron[NEURON_BITS-1:0];
wire param_thr_we = (state == S_IDLE) && prog_param_we && (prog_param_id == 5'd0);
reg homeo_thr_we;
reg signed [NEURON_WIDTH-1:0] homeo_thr_wdata;
wire thr_we_final = param_thr_we || homeo_thr_we;
wire signed [NEURON_WIDTH-1:0] thr_wdata_final = homeo_thr_we ? homeo_thr_wdata : $signed(prog_param_value);
wire signed [NEURON_WIDTH-1:0] param_thr_rdata;
sram #(.DATA_WIDTH(NEURON_WIDTH), .ADDR_WIDTH(NEURON_BITS)) threshold_mem (
.clk(clk), .we_a(thr_we_final), .addr_a(param_sram_addr),
.wdata_a(thr_wdata_final), .rdata_a(param_thr_rdata),
.addr_b({NEURON_BITS{1'b0}}), .rdata_b()
);
wire param_leak_we = (state == S_IDLE) && prog_param_we && (prog_param_id == 5'd1);
wire signed [DATA_WIDTH-1:0] param_leak_rdata;
sram #(.DATA_WIDTH(DATA_WIDTH), .ADDR_WIDTH(NEURON_BITS)) leak_mem (
.clk(clk), .we_a(param_leak_we), .addr_a(param_sram_addr),
.wdata_a(prog_param_value), .rdata_a(param_leak_rdata),
.addr_b({NEURON_BITS{1'b0}}), .rdata_b()
);
wire param_rest_we = (state == S_IDLE) && prog_param_we && (prog_param_id == 5'd2);
wire signed [DATA_WIDTH-1:0] param_rest_rdata;
sram #(.DATA_WIDTH(DATA_WIDTH), .ADDR_WIDTH(NEURON_BITS)) rest_mem (
.clk(clk), .we_a(param_rest_we), .addr_a(param_sram_addr),
.wdata_a(prog_param_value), .rdata_a(param_rest_rdata),
.addr_b({NEURON_BITS{1'b0}}), .rdata_b()
);
wire param_refrac_we = (state == S_IDLE) && prog_param_we && (prog_param_id == 5'd3);
wire [15:0] param_refrac_rdata;
sram #(.DATA_WIDTH(16), .ADDR_WIDTH(NEURON_BITS)) refrac_cfg_mem (
.clk(clk), .we_a(param_refrac_we), .addr_a(param_sram_addr),
.wdata_a(prog_param_value[15:0]), .rdata_a(param_refrac_rdata),
.addr_b({NEURON_BITS{1'b0}}), .rdata_b()
);
wire refrac_mode_abs = param_refrac_rdata[8];
wire refrac_mode_rel = param_refrac_rdata[9];
wire param_dend_thr_we = (state == S_IDLE) && prog_param_we && (prog_param_id == 5'd4);
wire signed [DATA_WIDTH-1:0] param_dend_thr_rdata;
sram #(.DATA_WIDTH(DATA_WIDTH), .ADDR_WIDTH(NEURON_BITS)) dend_thr_mem (
.clk(clk), .we_a(param_dend_thr_we), .addr_a(param_sram_addr),
.wdata_a(prog_param_value), .rdata_a(param_dend_thr_rdata),
.addr_b({NEURON_BITS{1'b0}}), .rdata_b()
);
wire param_noise_we = (state == S_IDLE) && prog_param_we && (prog_param_id == 5'd5);
wire [11:0] param_noise_rdata;
sram #(.DATA_WIDTH(12), .ADDR_WIDTH(NEURON_BITS)) noise_cfg_mem (
.clk(clk), .we_a(param_noise_we), .addr_a(param_sram_addr),
.wdata_a(prog_param_value[11:0]), .rdata_a(param_noise_rdata),
.addr_b({NEURON_BITS{1'b0}}), .rdata_b()
);
wire param_noise_target_we = (state == S_IDLE) && prog_param_we && (prog_param_id == 5'd29);
wire [1:0] param_noise_target_rdata;
sram #(.DATA_WIDTH(2), .ADDR_WIDTH(NEURON_BITS)) noise_target_mem (
.clk(clk), .we_a(param_noise_target_we), .addr_a(param_sram_addr),
.wdata_a(prog_param_value[1:0]), .rdata_a(param_noise_target_rdata),
.addr_b({NEURON_BITS{1'b0}}), .rdata_b()
);
wire param_vmin_we = (state == S_IDLE) && prog_param_we && (prog_param_id == 5'd30);
wire signed [DATA_WIDTH-1:0] param_vmin_rdata;
sram #(.DATA_WIDTH(DATA_WIDTH), .ADDR_WIDTH(NEURON_BITS)) vmin_mem (
.clk(clk), .we_a(param_vmin_we), .addr_a(param_sram_addr),
.wdata_a(prog_param_value), .rdata_a(param_vmin_rdata),
.addr_b({NEURON_BITS{1'b0}}), .rdata_b()
);
wire param_vmax_we = (state == S_IDLE) && prog_param_we && (prog_param_id == 5'd31);
wire signed [DATA_WIDTH-1:0] param_vmax_rdata;
sram #(.DATA_WIDTH(DATA_WIDTH), .ADDR_WIDTH(NEURON_BITS)) vmax_mem (
.clk(clk), .we_a(param_vmax_we), .addr_a(param_sram_addr),
.wdata_a(prog_param_value), .rdata_a(param_vmax_rdata),
.addr_b({NEURON_BITS{1'b0}}), .rdata_b()
);
wire param_tau1_we = (state == S_IDLE) && prog_param_we && (prog_param_id == 5'd6);
wire [3:0] param_tau1_rdata;
sram #(.DATA_WIDTH(4), .ADDR_WIDTH(NEURON_BITS)) tau1_cfg_mem (
.clk(clk), .we_a(param_tau1_we), .addr_a(param_sram_addr),
.wdata_a(prog_param_value[3:0]), .rdata_a(param_tau1_rdata),
.addr_b({NEURON_BITS{1'b0}}), .rdata_b()
);
wire param_tau2_we = (state == S_IDLE) && prog_param_we && (prog_param_id == 5'd7);
wire [3:0] param_tau2_rdata;
sram #(.DATA_WIDTH(4), .ADDR_WIDTH(NEURON_BITS)) tau2_cfg_mem (
.clk(clk), .we_a(param_tau2_we), .addr_a(param_sram_addr),
.wdata_a(prog_param_value[3:0]), .rdata_a(param_tau2_rdata),
.addr_b({NEURON_BITS{1'b0}}), .rdata_b()
);
wire param_tau_x2_we = (state == S_IDLE) && prog_param_we && (prog_param_id == 5'd19);
wire [3:0] param_tau_x2_rdata;
sram #(.DATA_WIDTH(4), .ADDR_WIDTH(NEURON_BITS)) tau_x2_cfg_mem (
.clk(clk), .we_a(param_tau_x2_we), .addr_a(param_sram_addr),
.wdata_a(prog_param_value[3:0]), .rdata_a(param_tau_x2_rdata),
.addr_b({NEURON_BITS{1'b0}}), .rdata_b()
);
wire param_tau_y2_we = (state == S_IDLE) && prog_param_we && (prog_param_id == 5'd20);
wire [3:0] param_tau_y2_rdata;
sram #(.DATA_WIDTH(4), .ADDR_WIDTH(NEURON_BITS)) tau_y2_cfg_mem (
.clk(clk), .we_a(param_tau_y2_we), .addr_a(param_sram_addr),
.wdata_a(prog_param_value[3:0]), .rdata_a(param_tau_y2_rdata),
.addr_b({NEURON_BITS{1'b0}}), .rdata_b()
);
wire param_tau_y3_we = (state == S_IDLE) && prog_param_we && (prog_param_id == 5'd21);
wire [3:0] param_tau_y3_rdata;
sram #(.DATA_WIDTH(4), .ADDR_WIDTH(NEURON_BITS)) tau_y3_cfg_mem (
.clk(clk), .we_a(param_tau_y3_we), .addr_a(param_sram_addr),
.wdata_a(prog_param_value[3:0]), .rdata_a(param_tau_y3_rdata),
.addr_b({NEURON_BITS{1'b0}}), .rdata_b()
);
wire param_dend_thr1_we = (state == S_IDLE) && prog_param_we && (prog_param_id == 5'd8);
wire signed [DATA_WIDTH-1:0] dend_thr_1_rdata;
sram #(.DATA_WIDTH(DATA_WIDTH), .ADDR_WIDTH(NEURON_BITS)) dend_thr_1_mem (
.clk(clk), .we_a(param_dend_thr1_we), .addr_a(param_sram_addr),
.wdata_a(prog_param_value), .rdata_a(dend_thr_1_rdata),
.addr_b({NEURON_BITS{1'b0}}), .rdata_b()
);
wire param_dend_thr2_we = (state == S_IDLE) && prog_param_we && (prog_param_id == 5'd9);
wire signed [DATA_WIDTH-1:0] dend_thr_2_rdata;
sram #(.DATA_WIDTH(DATA_WIDTH), .ADDR_WIDTH(NEURON_BITS)) dend_thr_2_mem (
.clk(clk), .we_a(param_dend_thr2_we), .addr_a(param_sram_addr),
.wdata_a(prog_param_value), .rdata_a(dend_thr_2_rdata),
.addr_b({NEURON_BITS{1'b0}}), .rdata_b()
);
wire param_dend_thr3_we = (state == S_IDLE) && prog_param_we && (prog_param_id == 5'd10);
wire signed [DATA_WIDTH-1:0] dend_thr_3_rdata;
sram #(.DATA_WIDTH(DATA_WIDTH), .ADDR_WIDTH(NEURON_BITS)) dend_thr_3_mem (
.clk(clk), .we_a(param_dend_thr3_we), .addr_a(param_sram_addr),
.wdata_a(prog_param_value), .rdata_a(dend_thr_3_rdata),
.addr_b({NEURON_BITS{1'b0}}), .rdata_b()
);
wire param_dend_parent_we = (state == S_IDLE) && prog_param_we && (prog_param_id == 5'd15);
wire [5:0] dend_parent_rdata;
sram #(.DATA_WIDTH(6), .ADDR_WIDTH(NEURON_BITS)) dend_parent_mem (
.clk(clk), .we_a(param_dend_parent_we), .addr_a(param_sram_addr),
.wdata_a(prog_param_value[5:0]), .rdata_a(dend_parent_rdata),
.addr_b({NEURON_BITS{1'b0}}), .rdata_b()
);
wire parent_ptr_param_we = (state == S_IDLE) && prog_param_we && (prog_param_id == 5'd22);
wire [NEURON_BITS-1:0] parent_ptr_rdata;
sram #(.DATA_WIDTH(NEURON_BITS), .ADDR_WIDTH(NEURON_BITS),
.INIT_VALUE({NEURON_BITS{1'b1}})) parent_ptr_mem (
.clk(clk), .we_a(parent_ptr_param_we), .addr_a(param_sram_addr),
.wdata_a(prog_param_value[NEURON_BITS-1:0]), .rdata_a(parent_ptr_rdata),
.addr_b({NEURON_BITS{1'b0}}), .rdata_b()
);
wire joinop_param_we = (state == S_IDLE) && prog_param_we && (prog_param_id == 5'd23);
wire [3:0] joinop_full_rdata;
wire [1:0] joinop_rdata = joinop_full_rdata[1:0];
wire [1:0] stackout_mode = joinop_full_rdata[3:2];
sram #(.DATA_WIDTH(4), .ADDR_WIDTH(NEURON_BITS)) joinop_mem (
.clk(clk), .we_a(joinop_param_we), .addr_a(param_sram_addr),
.wdata_a(prog_param_value[3:0]), .rdata_a(joinop_full_rdata),
.addr_b({NEURON_BITS{1'b0}}), .rdata_b()
);
wire is_root_param_we = (state == S_IDLE) && prog_param_we && (prog_param_id == 5'd24);
wire is_root_rdata;
sram #(.DATA_WIDTH(1), .ADDR_WIDTH(NEURON_BITS),
.INIT_VALUE(1'b1)) is_root_mem (
.clk(clk), .we_a(is_root_param_we), .addr_a(param_sram_addr),
.wdata_a(prog_param_value[0]), .rdata_a(is_root_rdata),
.addr_b({NEURON_BITS{1'b0}}), .rdata_b()
);
wire axon_type_param_we = (state == S_IDLE) && prog_param_we && (prog_param_id == 5'd25);
wire [4:0] axon_type_rdata;
reg [NEURON_BITS-1:0] axtype_rd_addr;
sram #(.DATA_WIDTH(5), .ADDR_WIDTH(NEURON_BITS)) axon_type_mem (
.clk(clk), .we_a(axon_type_param_we), .addr_a(param_sram_addr),
.wdata_a(prog_param_value[4:0]), .rdata_a(),
.addr_b(axtype_rd_addr), .rdata_b(axon_type_rdata)
);
wire axon_cfg_param_we = (state == S_IDLE) && prog_param_we && (prog_param_id == 5'd26);
reg [11:0] axon_cfg_regs [0:31];
wire [11:0] axon_cfg_rdata = axon_cfg_regs[axon_type_rdata];
always @(posedge clk) begin
if (axon_cfg_param_we)
axon_cfg_regs[param_sram_addr[4:0]] <= prog_param_value[11:0];
end
wire param_trace_en_we = (state == S_IDLE) && prog_param_we && (prog_param_id == 5'd27);
wire param_perf_reset_we = (state == S_IDLE) && prog_param_we && (prog_param_id == 5'd28);
reg [7:0] epoch_interval;
reg [7:0] epoch_counter;
reg [3:0] num_updates;
reg [3:0] update_pass;
wire param_epoch_we = (state == S_IDLE) && prog_param_we && (prog_param_id == 5'd11);
reg signed [DATA_WIDTH-1:0] reward_trace;
reg [3:0] reward_tau;
wire param_reward_tau_we = (state == S_IDLE) && prog_param_we && (prog_param_id == 5'd12);
reg spike_ts_we;
reg [NEURON_BITS-1:0] spike_ts_addr;
reg [7:0] spike_ts_wdata;
wire [7:0] spike_ts_rdata;
sram #(.DATA_WIDTH(8), .ADDR_WIDTH(NEURON_BITS)) spike_ts_mem (
.clk(clk), .we_a(spike_ts_we), .addr_a(spike_ts_addr),
.wdata_a(spike_ts_wdata), .rdata_a(spike_ts_rdata),
.addr_b({NEURON_BITS{1'b0}}), .rdata_b()
);
reg [7:0] timestep_within_epoch;
wire signed [DATA_WIDTH-1:0] rt_decay_raw = reward_trace >>> reward_tau;
wire signed [DATA_WIDTH-1:0] rt_decayed =
(reward_trace == 0) ? 16'sd0 :
(reward_trace > 0 && rt_decay_raw == 0) ? (reward_trace - 16'sd1) :
(reward_trace < 0 && rt_decay_raw == 0) ? (reward_trace + 16'sd1) :
(reward_trace - rt_decay_raw);
wire param_homeo_target_we = (state == S_IDLE) && prog_param_we && (prog_param_id == 5'd13);
wire [7:0] homeo_target_rdata;
sram #(.DATA_WIDTH(8), .ADDR_WIDTH(NEURON_BITS)) homeo_target_mem (
.clk(clk), .we_a(param_homeo_target_we), .addr_a(param_sram_addr),
.wdata_a(prog_param_value[7:0]), .rdata_a(homeo_target_rdata),
.addr_b({NEURON_BITS{1'b0}}), .rdata_b()
);
wire param_homeo_eta_we = (state == S_IDLE) && prog_param_we && (prog_param_id == 5'd14);
wire [7:0] homeo_eta_rdata;
sram #(.DATA_WIDTH(8), .ADDR_WIDTH(NEURON_BITS)) homeo_eta_mem (
.clk(clk), .we_a(param_homeo_eta_we), .addr_a(param_sram_addr),
.wdata_a(prog_param_value[7:0]), .rdata_a(homeo_eta_rdata),
.addr_b({NEURON_BITS{1'b0}}), .rdata_b()
);
wire param_decay_v_we = (state == S_IDLE) && prog_param_we && (prog_param_id == 5'd16);
wire [11:0] decay_v_rdata;
sram #(.DATA_WIDTH(12), .ADDR_WIDTH(NEURON_BITS)) decay_v_mem (
.clk(clk), .we_a(param_decay_v_we), .addr_a(param_sram_addr),
.wdata_a(prog_param_value[11:0]), .rdata_a(decay_v_rdata),
.addr_b({NEURON_BITS{1'b0}}), .rdata_b()
);
wire param_decay_u_we = (state == S_IDLE) && prog_param_we && (prog_param_id == 5'd17);
wire [11:0] decay_u_rdata;
sram #(.DATA_WIDTH(12), .ADDR_WIDTH(NEURON_BITS)) decay_u_mem (
.clk(clk), .we_a(param_decay_u_we), .addr_a(param_sram_addr),
.wdata_a(prog_param_value[11:0]), .rdata_a(decay_u_rdata),
.addr_b({NEURON_BITS{1'b0}}), .rdata_b()
);
wire param_bias_cfg_we = (state == S_IDLE) && prog_param_we && (prog_param_id == 5'd18);
wire [15:0] bias_cfg_rdata;
sram #(.DATA_WIDTH(16), .ADDR_WIDTH(NEURON_BITS)) bias_cfg_mem (
.clk(clk), .we_a(param_bias_cfg_we), .addr_a(param_sram_addr),
.wdata_a(prog_param_value[15:0]), .rdata_a(bias_cfg_rdata),
.addr_b({NEURON_BITS{1'b0}}), .rdata_b()
);
wire signed [12:0] bias_mant = $signed(bias_cfg_rdata[15:3]);
wire [2:0] bias_exp = bias_cfg_rdata[2:0];
wire signed [NEURON_WIDTH-1:0] bias_scaled =
($signed({{(NEURON_WIDTH-13){bias_mant[12]}}, bias_mant}) << bias_exp);
wire cuba_enabled = (decay_v_rdata != 12'd0) || (decay_u_rdata != 12'd0) || (bias_cfg_rdata != 16'd0);
wire signed [NEURON_WIDTH+11:0] v_decay_product = nrn_rdata * $signed({1'b0, decay_v_rdata});
wire signed [NEURON_WIDTH-1:0] v_decay_step = (decay_v_rdata == 12'd0) ? {NEURON_WIDTH{1'b0}} :
raz_div4096(v_decay_product);
wire signed [NEURON_WIDTH+11:0] u_decay_product = cur_rdata * $signed({1'b0, decay_u_rdata});
wire signed [NEURON_WIDTH-1:0] u_decay_step = (decay_u_rdata == 12'd0) ? {NEURON_WIDTH{1'b0}} :
raz_div4096(u_decay_product);
reg spike_cnt_we;
reg [NEURON_BITS-1:0] spike_cnt_addr;
reg [7:0] spike_cnt_wdata;
wire [7:0] spike_cnt_rdata;
sram #(.DATA_WIDTH(8), .ADDR_WIDTH(NEURON_BITS)) spike_count_mem (
.clk(clk), .we_a(spike_cnt_we), .addr_a(spike_cnt_addr),
.wdata_a(spike_cnt_wdata), .rdata_a(spike_cnt_rdata),
.addr_b({NEURON_BITS{1'b0}}), .rdata_b()
);
always @(posedge clk or negedge rst_n) begin
if (!rst_n) begin
epoch_interval <= 8'd1;
reward_tau <= 4'd4;
num_updates <= 4'd1;
end else begin
if (param_epoch_we) begin
epoch_interval <= prog_param_value[7:0];
num_updates <= (prog_param_value[15:12] == 4'd0) ? 4'd1 : prog_param_value[15:12];
end
if (param_reward_tau_we) reward_tau <= prog_param_value[3:0];
end
end
reg [15:0] lfsr;
wire lfsr_feedback = lfsr[0];
wire [15:0] lfsr_next = {lfsr_feedback, lfsr[15:1]} ^
(lfsr_feedback ? 16'hB400 : 16'h0000);
wire [3:0] noise_mant = param_noise_rdata[3:0];
wire [4:0] noise_exp = param_noise_rdata[8:4];
wire [31:0] noise_mask_wide = ({28'b0, noise_mant} << noise_exp);
wire [DATA_WIDTH-1:0] noise_mask = (|noise_mask_wide[31:DATA_WIDTH]) ?
{DATA_WIDTH{1'b1}} : noise_mask_wide[DATA_WIDTH-1:0];
wire signed [DATA_WIDTH-1:0] noise_value =
$signed({1'b0, lfsr[DATA_WIDTH-2:0] & noise_mask[DATA_WIDTH-2:0]}) -
$signed({1'b0, noise_mask[DATA_WIDTH-1:1]});
wire signed [NEURON_WIDTH-1:0] effective_threshold =
(noise_enable && param_noise_target_rdata == 2'd0) ? (param_thr_rdata + $signed(noise_value)) : param_thr_rdata;
wire signed [NEURON_WIDTH-1:0] noise_v_offset =
(noise_enable && param_noise_target_rdata == 2'd1) ?
$signed({{(NEURON_WIDTH-DATA_WIDTH){noise_value[DATA_WIDTH-1]}}, noise_value}) : {NEURON_WIDTH{1'b0}};
wire signed [NEURON_WIDTH-1:0] noise_u_offset =
(noise_enable && param_noise_target_rdata == 2'd2) ?
$signed({{(NEURON_WIDTH-DATA_WIDTH){noise_value[DATA_WIDTH-1]}}, noise_value}) : {NEURON_WIDTH{1'b0}};
wire signed [NEURON_WIDTH-1:0] vmin_ext = $signed({{(NEURON_WIDTH-DATA_WIDTH){param_vmin_rdata[DATA_WIDTH-1]}}, param_vmin_rdata});
wire signed [NEURON_WIDTH-1:0] vmax_ext = $signed({{(NEURON_WIDTH-DATA_WIDTH){param_vmax_rdata[DATA_WIDTH-1]}}, param_vmax_rdata});
wire [7:0] tau1_mask = (param_tau1_rdata == 4'd0) ? 8'd0 : ((8'd1 << param_tau1_rdata) - 8'd1);
wire [7:0] trace1_frac = trace_rdata & tau1_mask;
wire trace1_stoch_up = (param_tau1_rdata != 4'd0) && (trace1_frac != 8'd0) &&
((lfsr[7:0] & tau1_mask) < trace1_frac);
wire [7:0] trace1_decay_step = (trace_rdata >> param_tau1_rdata) + {7'd0, trace1_stoch_up};
wire [7:0] trace1_decay_val = (trace_rdata == 8'd0) ? 8'd0 :
(trace1_decay_step == 8'd0) ? (trace_rdata - 8'd1) :
(trace_rdata - trace1_decay_step);
wire [7:0] tau2_mask = (param_tau2_rdata == 4'd0) ? 8'd0 : ((8'd1 << param_tau2_rdata) - 8'd1);
wire [7:0] trace2_frac = trace2_rdata & tau2_mask;
wire trace2_stoch_up = (param_tau2_rdata != 4'd0) && (trace2_frac != 8'd0) &&
((lfsr[15:8] & tau2_mask) < trace2_frac);
wire [7:0] trace2_decay_step = (trace2_rdata >> param_tau2_rdata) + {7'd0, trace2_stoch_up};
wire [7:0] trace2_decay_val = (trace2_rdata == 8'd0) ? 8'd0 :
(trace2_decay_step == 8'd0) ? (trace2_rdata - 8'd1) :
(trace2_rdata - trace2_decay_step);
wire [7:0] taux2_mask = (param_tau_x2_rdata == 4'd0) ? 8'd0 : ((8'd1 << param_tau_x2_rdata) - 8'd1);
wire [7:0] x2_frac = x2_trace_rdata & taux2_mask;
wire x2_stoch_up = (param_tau_x2_rdata != 4'd0) && (x2_frac != 8'd0) &&
((lfsr[7:0] ^ lfsr[15:8] & taux2_mask) < x2_frac);
wire [7:0] x2_decay_step = (x2_trace_rdata >> param_tau_x2_rdata) + {7'd0, x2_stoch_up};
wire [7:0] x2_decay_val = (x2_trace_rdata == 8'd0) ? 8'd0 :
(x2_decay_step == 8'd0) ? (x2_trace_rdata - 8'd1) :
(x2_trace_rdata - x2_decay_step);
wire [7:0] tauy2_mask = (param_tau_y2_rdata == 4'd0) ? 8'd0 : ((8'd1 << param_tau_y2_rdata) - 8'd1);
wire [7:0] y2_frac = y2_trace_rdata & tauy2_mask;
wire y2_stoch_up = (param_tau_y2_rdata != 4'd0) && (y2_frac != 8'd0) &&
({lfsr[3:0], lfsr[15:12]} & tauy2_mask) < y2_frac;
wire [7:0] y2_decay_step = (y2_trace_rdata >> param_tau_y2_rdata) + {7'd0, y2_stoch_up};
wire [7:0] y2_decay_val = (y2_trace_rdata == 8'd0) ? 8'd0 :
(y2_decay_step == 8'd0) ? (y2_trace_rdata - 8'd1) :
(y2_trace_rdata - y2_decay_step);
wire [7:0] tauy3_mask = (param_tau_y3_rdata == 4'd0) ? 8'd0 : ((8'd1 << param_tau_y3_rdata) - 8'd1);
wire [7:0] y3_frac = y3_trace_rdata & tauy3_mask;
wire y3_stoch_up = (param_tau_y3_rdata != 4'd0) && (y3_frac != 8'd0) &&
({lfsr[11:8], lfsr[7:4]} & tauy3_mask) < y3_frac;
wire [7:0] y3_decay_step = (y3_trace_rdata >> param_tau_y3_rdata) + {7'd0, y3_stoch_up};
wire [7:0] y3_decay_val = (y3_trace_rdata == 8'd0) ? 8'd0 :
(y3_decay_step == 8'd0) ? (y3_trace_rdata - 8'd1) :
(y3_trace_rdata - y3_decay_step);
integer pi;
initial begin
for (pi = 0; pi < 32; pi = pi + 1) begin
axon_cfg_regs[pi] = 12'd0;
end
end
localparam FIFO_WIDTH = NEURON_BITS + 8;
reg fifo_sel;
reg fifo_a_push, fifo_a_pop, fifo_a_clear;
reg [FIFO_WIDTH-1:0] fifo_a_push_data_reg;
wire [FIFO_WIDTH-1:0] fifo_a_pop_data;
wire fifo_a_empty, fifo_a_full;
spike_fifo #(.ID_WIDTH(FIFO_WIDTH), .DEPTH(64), .PTR_BITS(6)) fifo_a (
.clk(clk), .rst_n(rst_n), .clear(fifo_a_clear),
.push(fifo_a_push), .push_data(fifo_a_push_data_reg),
.pop(fifo_a_pop), .pop_data(fifo_a_pop_data),
.empty(fifo_a_empty), .full(fifo_a_full), .count()
);
reg fifo_b_push, fifo_b_pop, fifo_b_clear;
reg [FIFO_WIDTH-1:0] fifo_b_push_data_reg;
wire [FIFO_WIDTH-1:0] fifo_b_pop_data;
wire fifo_b_empty, fifo_b_full;
spike_fifo #(.ID_WIDTH(FIFO_WIDTH), .DEPTH(64), .PTR_BITS(6)) fifo_b (
.clk(clk), .rst_n(rst_n), .clear(fifo_b_clear),
.push(fifo_b_push), .push_data(fifo_b_push_data_reg),
.pop(fifo_b_pop), .pop_data(fifo_b_pop_data),
.empty(fifo_b_empty), .full(fifo_b_full), .count()
);
wire prev_fifo_empty = fifo_sel ? fifo_b_empty : fifo_a_empty;
wire [FIFO_WIDTH-1:0] prev_fifo_data = fifo_sel ? fifo_b_pop_data : fifo_a_pop_data;
wire curr_fifo_full = fifo_sel ? fifo_a_full : fifo_b_full;
reg [NEURON_BITS:0] proc_neuron;
reg [NEURON_BITS-1:0] curr_spike_src;
reg [7:0] curr_spike_payload;
reg [POOL_ADDR_BITS-1:0] curr_base_addr;
reg [COUNT_BITS-1:0] curr_count;
reg [COUNT_BITS-1:0] conn_idx;
reg signed [NEURON_WIDTH-1:0] proc_potential;
reg signed [NEURON_WIDTH-1:0] proc_current;
reg [7:0] proc_refrac;
reg signed [DATA_WIDTH-1:0] proc_input;
reg [NEURON_BITS-1:0] saved_target;
reg signed [DATA_WIDTH-1:0] saved_weight;
reg [COMPARTMENT_BITS-1:0] saved_comp;
reg proc_spiked_this_neuron;
reg signed [NEURON_WIDTH-1:0] spike_contribution;
reg [NEURON_BITS-1:0] saved_parent_ptr;
reg [1:0] curr_format;
reg [NEURON_BITS-1:0] base_target;
reg signed [DATA_WIDTH-1:0] shared_weight;
reg [COMPARTMENT_BITS-1:0] shared_comp;
reg pack_active;
reg [3:0] pack_shift;
reg [3:0] pack_nwb;
reg [POOL_ADDR_BITS:0] pool_used_count;
reg [POOL_ADDR_BITS:0] elig_scan_addr;
reg learn_mode;
reg [NEURON_BITS:0] learn_neuron;
reg [COUNT_BITS-1:0] learn_slot;
reg [POOL_ADDR_BITS-1:0] learn_base_addr;
reg [COUNT_BITS-1:0] learn_count;
reg learn_rev_valid;
reg [NEURON_BITS-1:0] learn_rev_src;
reg [POOL_ADDR_BITS-1:0] learn_rev_pool_addr;
reg [NUM_NEURONS-1:0] spike_bitmap;
wire [3:0] mc_opcode = ucode_rdata[31:28];
wire [3:0] mc_dst = ucode_rdata[27:24];
wire [3:0] mc_src_a = ucode_rdata[23:20];
wire [3:0] mc_src_b = ucode_rdata[19:16];
wire [2:0] mc_shift = ucode_rdata[15:13];
wire signed [15:0] mc_imm = ucode_rdata[15:0];
wire signed [DATA_WIDTH-1:0] mc_op_a = mc_regs[mc_src_a];
wire signed [DATA_WIDTH-1:0] mc_op_b = mc_regs[mc_src_b];
wire signed [31:0] mc_mul_raw = mc_op_a * mc_op_b;
reg signed [DATA_WIDTH-1:0] mc_alu_result;
always @(*) begin
case (mc_opcode)
4'd1: mc_alu_result = mc_op_a + mc_op_b;
4'd2: mc_alu_result = mc_op_a - mc_op_b;
4'd3: mc_alu_result = mc_mul_raw >>> mc_shift;
4'd4: mc_alu_result = mc_op_a >>> mc_shift;
4'd5: mc_alu_result = mc_op_a << mc_shift;
4'd6: mc_alu_result = (mc_op_a > mc_op_b) ? mc_op_a : mc_op_b;
4'd7: mc_alu_result = (mc_op_a < mc_op_b) ? mc_op_a : mc_op_b;
4'd8: mc_alu_result = mc_imm;
default: mc_alu_result = 16'sd0;
endcase
end
wire [POOL_ADDR_BITS-1:0] learn_wr_addr =
(learn_mode == 0) ? (learn_base_addr + learn_slot) : learn_rev_pool_addr;
wire signed [31:0] reward_product = $signed(elig_rdata) * $signed(reward_trace);
wire signed [DATA_WIDTH-1:0] reward_delta = reward_product >>> REWARD_SHIFT;
wire signed [DATA_WIDTH-1:0] elig_new_wt_raw = pool_wt_rdata + reward_delta;
wire signed [DATA_WIDTH-1:0] elig_new_wt =
(elig_new_wt_raw > WEIGHT_MAX) ? WEIGHT_MAX :
(elig_new_wt_raw < WEIGHT_MIN) ? WEIGHT_MIN :
elig_new_wt_raw;
wire signed [DATA_WIDTH-1:0] elig_decay_step = elig_rdata >>> ELIG_DECAY_SHIFT;
wire signed [DATA_WIDTH-1:0] elig_decayed =
(elig_rdata > 0 && elig_decay_step == 0) ? elig_rdata - 16'sd1 :
elig_rdata - elig_decay_step;
wire [1:0] dend_parent1 = dend_parent_rdata[1:0];
wire [1:0] dend_parent2 = dend_parent_rdata[3:2];
wire [1:0] dend_parent3 = dend_parent_rdata[5:4];
wire signed [DATA_WIDTH-1:0] tree_out3 =
(dend_acc_3_rdata > dend_thr_3_rdata) ? (dend_acc_3_rdata - dend_thr_3_rdata) : 16'sd0;
wire signed [DATA_WIDTH-1:0] tree_in2 = dend_acc_2_rdata +
((dend_parent3 == 2'd2) ? tree_out3 : 16'sd0);
wire signed [DATA_WIDTH-1:0] tree_out2 =
(tree_in2 > dend_thr_2_rdata) ? (tree_in2 - dend_thr_2_rdata) : 16'sd0;
wire signed [DATA_WIDTH-1:0] tree_in1 = dend_acc_1_rdata +
((dend_parent2 == 2'd1) ? tree_out2 : 16'sd0) +
((dend_parent3 == 2'd1) ? tree_out3 : 16'sd0);
wire signed [DATA_WIDTH-1:0] tree_out1 =
(tree_in1 > dend_thr_1_rdata) ? (tree_in1 - dend_thr_1_rdata) : 16'sd0;
wire signed [DATA_WIDTH-1:0] total_dend =
((dend_parent1 == 2'd0) ? tree_out1 : 16'sd0) +
((dend_parent2 == 2'd0) ? tree_out2 : 16'sd0) +
((dend_parent3 == 2'd0) ? tree_out3 : 16'sd0);
wire signed [NEURON_WIDTH-1:0] total_input = dendritic_enable ?
(acc_rdata + $signed(total_dend)) : acc_rdata;
wire signed [NEURON_WIDTH+11:0] scale_u_product = total_input * $signed({1'b0, decay_u_rdata});
wire signed [NEURON_WIDTH-1:0] scaled_total_input = scale_u_enable ?
raz_div4096(scale_u_product) : total_input;
wire signed [NEURON_WIDTH-1:0] spike_excess = $signed(nrn_rdata[DATA_WIDTH-1:0]) + total_input - $signed(param_leak_rdata) - effective_threshold;
wire [7:0] spike_payload_val = (spike_excess > 16'sd255) ? 8'd255 :
(spike_excess < 16'sd1) ? 8'd1 : spike_excess[7:0];
wire signed [31:0] graded_weight_ext = saved_weight;
wire signed [31:0] graded_payload_ext = {24'd0, curr_spike_payload};
wire signed [31:0] graded_product = graded_weight_ext * graded_payload_ext;
wire signed [DATA_WIDTH-1:0] graded_current = graded_product >>> GRADE_SHIFT;
wire [NEURON_BITS-1:0] deliver_target =
(curr_format == FMT_SPARSE) ? pool_tgt_rdata :
(conn_idx == 0) ? pool_tgt_rdata :
(base_target + conn_idx);
wire signed [DATA_WIDTH-1:0] deliver_weight =
(curr_format == FMT_POP && conn_idx != 0) ? shared_weight : pool_wt_rdata;
wire [COMPARTMENT_BITS-1:0] deliver_comp =
(curr_format == FMT_POP && conn_idx != 0) ? shared_comp : pool_comp_rdata;
reg ext_pending;
reg [NEURON_BITS-1:0] ext_buf_id;
reg signed [DATA_WIDTH-1:0] ext_buf_current;
always @(posedge clk or negedge rst_n) begin
if (!rst_n)
ext_pending <= 0;
else if (ext_valid) begin
ext_pending <= 1;
ext_buf_id <= ext_neuron_id;
ext_buf_current <= ext_current;
end
end
always @(posedge clk or negedge rst_n) begin
if (!rst_n) begin
state <= S_IDLE;
fifo_sel <= 0;
timestep_done <= 0;
spike_out_valid <= 0;
spike_out_payload <= 0;
total_spikes <= 0;
timestep_count <= 0;
proc_neuron <= 0;
conn_idx <= 0;
curr_spike_payload <= 0;
nrn_we <= 0; ref_we <= 0; acc_we <= 0; cur_we <= 0;
pool_wt_we_r <= 0; trace_we <= 0; trace2_we <= 0;
x2_trace_we <= 0; y2_trace_we <= 0; y3_trace_we <= 0;
pool_tag_we_r <= 0; pool_delay_we_learn <= 0;
dend_acc_1_we <= 0; dend_acc_2_we <= 0; dend_acc_3_we <= 0;
fifo_a_push <= 0; fifo_a_pop <= 0; fifo_a_clear <= 0;
fifo_b_push <= 0; fifo_b_pop <= 0; fifo_b_clear <= 0;
proc_current <= 0;
spike_bitmap <= 0;
learn_mode <= 0;
learn_neuron <= 0;
learn_slot <= 0;
learn_rev_valid <= 0;
learn_rev_src <= 0;
learn_rev_pool_addr <= 0;
rev_addr <= 0;
saved_comp <= 0;
curr_format <= 0;
base_target <= 0;
shared_weight <= 0;
shared_comp <= 0;
pack_active <= 0;
pack_shift <= 0;
pack_nwb <= 0;
elig_we <= 0;
elig_addr <= 0;
elig_wdata <= 0;
elig_scan_addr <= 0;
pool_used_count <= 0;
lfsr <= NOISE_LFSR_SEED;
mc_pc <= 0;
elig_phase <= 0;
mc_regs[0] <= 0; mc_regs[1] <= 0; mc_regs[2] <= 0; mc_regs[3] <= 0;
mc_regs[4] <= 0; mc_regs[5] <= 0; mc_regs[6] <= 0; mc_regs[7] <= 0;
mc_regs[8] <= 0; mc_regs[9] <= 0; mc_regs[10] <= 0; mc_regs[11] <= 0;
mc_regs[12] <= 0; mc_regs[13] <= 0; mc_regs[14] <= 0; mc_regs[15] <= 0;
pool_addr_r <= 0;
pool_wt_wr_addr <= 0;
pool_wt_wr_data <= 0;
index_rd_addr <= 0;
curr_base_addr <= 0;
curr_count <= 0;
learn_base_addr <= 0;
learn_count <= 0;
dq_we <= 0;
dq_addr <= 0;
dq_wdata <= 0;
current_ts_mod64 <= 0;
drain_cnt <= 0;
drain_idx <= 0;
dq_cap_target <= 0;
dq_cap_current <= 0;
dq_cap_comp <= 0;
proc_spiked_this_neuron <= 0;
spike_contribution <= 0;
saved_parent_ptr <= 0;
was_idle <= 1;
any_spike_this_ts <= 0;
epoch_counter <= 0;
reward_trace <= 0;
spike_cnt_we <= 0;
spike_cnt_addr <= 0;
spike_cnt_wdata <= 0;
homeo_thr_we <= 0;
homeo_thr_wdata <= 0;
axtype_rd_addr <= 0;
spike_ts_we <= 0;
spike_ts_addr <= 0;
spike_ts_wdata <= 0;
update_pass <= 0;
timestep_within_epoch <= 0;
perf_spike_count <= 0;
perf_active_cycles <= 0;
perf_synaptic_ops <= 0;
trace_fifo_enable <= 0;
trace_wr_ptr <= 0;
trace_rd_ptr <= 0;
trace_last_popped <= 0;
end else begin
nrn_we <= 0; ref_we <= 0; acc_we <= 0; cur_we <= 0;
pool_wt_we_r <= 0; trace_we <= 0; trace2_we <= 0; elig_we <= 0;
x2_trace_we <= 0; y2_trace_we <= 0; y3_trace_we <= 0;
pool_tag_we_r <= 0; pool_delay_we_learn <= 0;
dq_we <= 0;
dend_acc_1_we <= 0; dend_acc_2_we <= 0; dend_acc_3_we <= 0;
spike_cnt_we <= 0; homeo_thr_we <= 0; spike_ts_we <= 0;
timestep_done <= 0;
spike_out_valid <= 0;
fifo_a_push <= 0; fifo_a_pop <= 0; fifo_a_clear <= 0;
fifo_b_push <= 0; fifo_b_pop <= 0; fifo_b_clear <= 0;
if (state != S_IDLE)
perf_active_cycles <= perf_active_cycles + 1;
if (param_trace_en_we)
trace_fifo_enable <= prog_param_value[0];
if (param_perf_reset_we) begin
perf_spike_count <= 0;
perf_active_cycles <= 0;
perf_synaptic_ops <= 0;
end
if (probe_active_r && probe_sid_r == 5'd22 && !trace_fifo_empty) begin
trace_last_popped <= trace_fifo_mem[trace_rd_ptr[5:0]];
trace_rd_ptr <= trace_rd_ptr + 1;
end
if (state == S_IDLE && pool_we) begin
rev_count[pool_target_in] <= rev_count[pool_target_in] + 1;
if ({1'b0, pool_addr_in} + 1 > pool_used_count)
pool_used_count <= {1'b0, pool_addr_in} + 1;
end
case (state)
S_IDLE: begin
if (ext_valid) begin
acc_we <= 1;
acc_addr <= ext_neuron_id;
acc_wdata <= ext_current;
end
if (start) begin
any_spike_this_ts <= 0;
update_pass <= 0;
state <= S_DELAY_DRAIN_INIT;
end
end
S_DELIVER_POP: begin
if (prev_fifo_empty) begin
state <= S_UPDATE_INIT;
proc_neuron <= 0;
end else begin
curr_spike_src <= prev_fifo_data[FIFO_WIDTH-1:8];
curr_spike_payload <= prev_fifo_data[7:0];
if (fifo_sel)
fifo_b_pop <= 1;
else
fifo_a_pop <= 1;
index_rd_addr <= prev_fifo_data[FIFO_WIDTH-1:8];
axtype_rd_addr <= prev_fifo_data[FIFO_WIDTH-1:8];
state <= S_DELIVER_IDX_WAIT;
end
end
S_DELIVER_IDX_WAIT: begin
state <= S_DELIVER_IDX_READ;
end
S_DELIVER_IDX_READ: begin
curr_format <= index_rdata[INDEX_WIDTH-1 -: 2];
curr_base_addr <= index_rdata[COUNT_BITS +: POOL_ADDR_BITS];
curr_count <= index_rdata[COUNT_BITS-1:0];
conn_idx <= 0;
if (index_rdata[INDEX_WIDTH-1 -: 2] == FMT_DENSE &&
axon_cfg_rdata[0] == 1'b1) begin
pack_active <= 1;
pack_nwb <= axon_cfg_rdata[11:8];
case (axon_cfg_rdata[11:8])
4'd1: pack_shift <= 4'd4;
4'd2: pack_shift <= 4'd3;
4'd4: pack_shift <= 4'd2;
4'd8: pack_shift <= 4'd1;
default: pack_active <= 0;
endcase
end else begin
pack_active <= 0;
end
if (index_rdata[COUNT_BITS-1:0] == 0) begin
state <= S_DELIVER_POP;
end else begin
pool_addr_r <= index_rdata[COUNT_BITS +: POOL_ADDR_BITS];
state <= S_DELIVER_POOL_WAIT;
end
end
S_DELIVER_POOL_WAIT: begin
state <= S_DELIVER_ADDR;
end
S_DELIVER_ADDR: begin
saved_target <= deliver_target;
saved_comp <= deliver_comp;
if (pack_active) begin : pack_extract
reg [3:0] p_sub;
reg [6:0] p_off;
case (pack_shift)
4'd4: p_sub = conn_idx[3:0];
4'd3: p_sub = conn_idx[2:0];
4'd2: p_sub = conn_idx[1:0];
4'd1: p_sub = conn_idx[0:0];
default: p_sub = 0;
endcase
p_off = p_sub * pack_nwb;
saved_weight <= (deliver_weight >> p_off);
end else begin
saved_weight <= deliver_weight;
end
acc_addr <= deliver_target;
dend_acc_1_addr <= deliver_target;
dend_acc_2_addr <= deliver_target;
dend_acc_3_addr <= deliver_target;
axtype_rd_addr <= deliver_target;
if (conn_idx == 0 && curr_format != FMT_SPARSE)
base_target <= pool_tgt_rdata;
if (conn_idx == 0 && curr_format == FMT_POP) begin
shared_weight <= pool_wt_rdata;
shared_comp <= pool_comp_rdata;
end
state <= S_DELIVER_ACC_WAIT;
end
S_DELIVER_ACC_WAIT: begin
state <= S_DELIVER_AXTYPE;
end
S_DELIVER_AXTYPE: begin
if (axon_cfg_rdata[11:8] != 4'd0) begin
begin: axtype_decompress
reg [3:0] nwb;
reg signed [3:0] wexp_s;
reg is_exc;
reg is_mixed;
reg signed [DATA_WIDTH-1:0] raw, shifted;
reg sign_bit;
reg signed [DATA_WIDTH-1:0] magnitude;
nwb = axon_cfg_rdata[11:8];
wexp_s = $signed(axon_cfg_rdata[7:4]);
is_exc = axon_cfg_rdata[2];
is_mixed = axon_cfg_rdata[1];
case (nwb)
4'd1: raw = saved_weight & 16'h0001;
4'd2: raw = saved_weight & 16'h0003;
4'd3: raw = saved_weight & 16'h0007;
4'd4: raw = saved_weight & 16'h000F;
4'd5: raw = saved_weight & 16'h001F;
4'd6: raw = saved_weight & 16'h003F;
4'd7: raw = saved_weight & 16'h007F;
4'd8: raw = saved_weight & 16'h00FF;
4'd9: raw = saved_weight & 16'h01FF;
default: raw = saved_weight;
endcase
if (is_mixed && nwb > 1) begin
sign_bit = raw[nwb-1];
case (nwb)
4'd2: magnitude = raw & 16'h0001;
4'd3: magnitude = raw & 16'h0003;
4'd4: magnitude = raw & 16'h0007;
4'd5: magnitude = raw & 16'h000F;
4'd6: magnitude = raw & 16'h001F;
4'd7: magnitude = raw & 16'h003F;
4'd8: magnitude = raw & 16'h007F;
4'd9: magnitude = raw & 16'h00FF;
default: magnitude = raw;
endcase
if (wexp_s >= 0)
shifted = magnitude << wexp_s;
else
shifted = magnitude >>> (-wexp_s);
saved_weight <= sign_bit ? (-shifted) : shifted;
end else begin
if (wexp_s >= 0)
shifted = raw << wexp_s;
else
shifted = raw >>> (-wexp_s);
saved_weight <= is_exc ? (-shifted) : shifted;
end
end
end
state <= S_DELIVER_ACC;
end
S_DELIVER_ACC: begin
perf_synaptic_ops <= perf_synaptic_ops + 1;
if (pool_delay_rdata != 0 &&
delay_count[delivery_ts] < DELAY_ENTRIES_PER_TS) begin
dq_we <= 1;
dq_addr <= {delivery_ts, delay_count[delivery_ts][DELAY_ENTRY_BITS-1:0]};
dq_wdata <= {saved_target, delivered_current, saved_comp};
delay_count[delivery_ts] <= delay_count[delivery_ts] + 1;
end else begin
case (saved_comp)
2'd0: begin
acc_we <= 1;
acc_addr <= saved_target;
acc_wdata <= graded_enable ?
(acc_rdata + graded_current) :
(acc_rdata + saved_weight);
end
2'd1: begin
dend_acc_1_we <= 1;
dend_acc_1_addr <= saved_target;
dend_acc_1_wdata <= graded_enable ?
(dend_acc_1_rdata + graded_current) :
(dend_acc_1_rdata + saved_weight);
end
2'd2: begin
dend_acc_2_we <= 1;
dend_acc_2_addr <= saved_target;
dend_acc_2_wdata <= graded_enable ?
(dend_acc_2_rdata + graded_current) :
(dend_acc_2_rdata + saved_weight);
end
2'd3: begin
dend_acc_3_we <= 1;
dend_acc_3_addr <= saved_target;
dend_acc_3_wdata <= graded_enable ?
(dend_acc_3_rdata + graded_current) :
(dend_acc_3_rdata + saved_weight);
end
endcase
end
state <= S_DELIVER_NEXT;
end
S_DELIVER_NEXT: begin
if (conn_idx < curr_count - 1) begin
conn_idx <= conn_idx + 1;
if (curr_format == FMT_POP) begin
state <= S_DELIVER_ADDR;
end else begin
if (pack_active)
pool_addr_r <= curr_base_addr + ((conn_idx + 1) >> pack_shift);
else
pool_addr_r <= pool_addr_r + 1;
state <= S_DELIVER_POOL_WAIT;
end
end else begin
state <= S_DELIVER_POP;
end
end
S_UPDATE_INIT: begin
nrn_addr <= proc_neuron[NEURON_BITS-1:0];
cur_addr <= proc_neuron[NEURON_BITS-1:0];
ref_addr <= proc_neuron[NEURON_BITS-1:0];
acc_addr <= proc_neuron[NEURON_BITS-1:0];
trace_addr <= proc_neuron[NEURON_BITS-1:0];
trace2_addr <= proc_neuron[NEURON_BITS-1:0];
x2_trace_addr <= proc_neuron[NEURON_BITS-1:0];
y2_trace_addr <= proc_neuron[NEURON_BITS-1:0];
y3_trace_addr <= proc_neuron[NEURON_BITS-1:0];
dend_acc_1_addr <= proc_neuron[NEURON_BITS-1:0];
dend_acc_2_addr <= proc_neuron[NEURON_BITS-1:0];
dend_acc_3_addr <= proc_neuron[NEURON_BITS-1:0];
spike_cnt_addr <= proc_neuron[NEURON_BITS-1:0];
state <= S_UPDATE_READ;
end
S_UPDATE_READ: begin
nrn_addr <= proc_neuron[NEURON_BITS-1:0];
cur_addr <= proc_neuron[NEURON_BITS-1:0];
ref_addr <= proc_neuron[NEURON_BITS-1:0];
acc_addr <= proc_neuron[NEURON_BITS-1:0];
trace_addr <= proc_neuron[NEURON_BITS-1:0];
trace2_addr <= proc_neuron[NEURON_BITS-1:0];
x2_trace_addr <= proc_neuron[NEURON_BITS-1:0];
y2_trace_addr <= proc_neuron[NEURON_BITS-1:0];
y3_trace_addr <= proc_neuron[NEURON_BITS-1:0];
dend_acc_1_addr <= proc_neuron[NEURON_BITS-1:0];
dend_acc_2_addr <= proc_neuron[NEURON_BITS-1:0];
dend_acc_3_addr <= proc_neuron[NEURON_BITS-1:0];
spike_cnt_addr <= proc_neuron[NEURON_BITS-1:0];
state <= S_UPDATE_CALC;
end
S_UPDATE_CALC: begin
proc_refrac <= ref_rdata;
proc_input <= total_input;
proc_spiked_this_neuron <= 0;
lfsr <= lfsr_next;
if (cuba_enabled) begin
proc_current <= cur_rdata - u_decay_step + scaled_total_input + noise_u_offset;
if (ref_rdata > 0) begin
proc_refrac <= ref_rdata - 1;
if (refrac_mode_rel) begin
proc_potential <= nrn_rdata - v_decay_step - bias_scaled + noise_v_offset;
end else begin
proc_potential <= $signed({{(NEURON_WIDTH-DATA_WIDTH){param_rest_rdata[DATA_WIDTH-1]}}, param_rest_rdata});
end
trace_wdata <= trace1_decay_val;
trace2_wdata <= trace2_decay_val;
x2_trace_wdata <= x2_decay_val;
y2_trace_wdata <= y2_decay_val;
y3_trace_wdata <= y3_decay_val;
end else begin
proc_potential <= nrn_rdata - v_decay_step + cur_rdata + bias_scaled + noise_v_offset;
if (nrn_rdata - v_decay_step + cur_rdata + bias_scaled + noise_v_offset >= effective_threshold) begin
proc_potential <= $signed({{(NEURON_WIDTH-DATA_WIDTH){param_rest_rdata[DATA_WIDTH-1]}}, param_rest_rdata});
proc_refrac <= param_refrac_rdata[7:0];
trace_wdata <= TRACE_MAX;
trace2_wdata <= TRACE_MAX;
x2_trace_wdata <= TRACE_MAX;
y2_trace_wdata <= TRACE_MAX;
y3_trace_wdata <= TRACE_MAX;
spike_bitmap[proc_neuron[NEURON_BITS-1:0]] <= 1;
any_spike_this_ts <= 1;
proc_spiked_this_neuron <= 1;
spike_ts_we <= 1;
spike_ts_addr <= proc_neuron[NEURON_BITS-1:0];
spike_ts_wdata <= timestep_within_epoch;
case (stackout_mode)
2'd0: spike_contribution <= effective_threshold;
2'd1: spike_contribution <= nrn_rdata;
2'd2: spike_contribution <= cur_rdata;
2'd3: spike_contribution <= acc_rdata;
endcase
if (is_root_rdata) begin
if (fifo_sel) begin
fifo_a_push <= 1;
fifo_a_push_data_reg <= {proc_neuron[NEURON_BITS-1:0], spike_payload_val};
end else begin
fifo_b_push <= 1;
fifo_b_push_data_reg <= {proc_neuron[NEURON_BITS-1:0], spike_payload_val};
end
spike_out_valid <= 1;
spike_out_id <= proc_neuron[NEURON_BITS-1:0];
spike_out_payload <= spike_payload_val;
total_spikes <= total_spikes + 1;
perf_spike_count <= perf_spike_count + 1;
if (trace_fifo_enable && !trace_fifo_full)
trace_fifo_mem[trace_wr_ptr[5:0]] <= {timestep_count[15:0], {(16-NEURON_BITS){1'b0}}, proc_neuron[NEURON_BITS-1:0]};
if (trace_fifo_enable && !trace_fifo_full)
trace_wr_ptr <= trace_wr_ptr + 1;
end
end else begin
trace_wdata <= trace1_decay_val;
trace2_wdata <= trace2_decay_val;
x2_trace_wdata <= x2_decay_val;
y2_trace_wdata <= y2_decay_val;
y3_trace_wdata <= y3_decay_val;
end
end
end else begin
proc_current <= {NEURON_WIDTH{1'b0}};
if (ref_rdata > 0) begin
proc_potential <= $signed({{(NEURON_WIDTH-DATA_WIDTH){param_rest_rdata[DATA_WIDTH-1]}}, param_rest_rdata});
proc_refrac <= ref_rdata - 1;
trace_wdata <= trace1_decay_val;
trace2_wdata <= trace2_decay_val;
x2_trace_wdata <= x2_decay_val;
y2_trace_wdata <= y2_decay_val;
y3_trace_wdata <= y3_decay_val;
end else if ($signed(nrn_rdata[DATA_WIDTH-1:0]) + total_input - param_leak_rdata >= effective_threshold) begin
proc_potential <= $signed({{(NEURON_WIDTH-DATA_WIDTH){param_rest_rdata[DATA_WIDTH-1]}}, param_rest_rdata});
proc_refrac <= param_refrac_rdata[7:0];
trace_wdata <= TRACE_MAX;
trace2_wdata <= TRACE_MAX;
x2_trace_wdata <= TRACE_MAX;
y2_trace_wdata <= TRACE_MAX;
y3_trace_wdata <= TRACE_MAX;
spike_bitmap[proc_neuron[NEURON_BITS-1:0]] <= 1;
any_spike_this_ts <= 1;
proc_spiked_this_neuron <= 1;
spike_ts_we <= 1;
spike_ts_addr <= proc_neuron[NEURON_BITS-1:0];
spike_ts_wdata <= timestep_within_epoch;
spike_contribution <= effective_threshold;
if (is_root_rdata) begin
if (fifo_sel) begin
fifo_a_push <= 1;
fifo_a_push_data_reg <= {proc_neuron[NEURON_BITS-1:0], spike_payload_val};
end else begin
fifo_b_push <= 1;
fifo_b_push_data_reg <= {proc_neuron[NEURON_BITS-1:0], spike_payload_val};
end
spike_out_valid <= 1;
spike_out_id <= proc_neuron[NEURON_BITS-1:0];
spike_out_payload <= spike_payload_val;
total_spikes <= total_spikes + 1;
perf_spike_count <= perf_spike_count + 1;
if (trace_fifo_enable && !trace_fifo_full)
trace_fifo_mem[trace_wr_ptr[5:0]] <= {timestep_count[15:0], {(16-NEURON_BITS){1'b0}}, proc_neuron[NEURON_BITS-1:0]};
if (trace_fifo_enable && !trace_fifo_full)
trace_wr_ptr <= trace_wr_ptr + 1;
end
end else if ($signed(nrn_rdata[DATA_WIDTH-1:0]) + total_input > param_leak_rdata) begin
proc_potential <= $signed({{(NEURON_WIDTH-DATA_WIDTH){1'b0}}, $signed(nrn_rdata[DATA_WIDTH-1:0]) + total_input - param_leak_rdata});
trace_wdata <= trace1_decay_val;
trace2_wdata <= trace2_decay_val;
x2_trace_wdata <= x2_decay_val;
y2_trace_wdata <= y2_decay_val;
y3_trace_wdata <= y3_decay_val;
end else begin
proc_potential <= $signed({{(NEURON_WIDTH-DATA_WIDTH){param_rest_rdata[DATA_WIDTH-1]}}, param_rest_rdata});
trace_wdata <= trace1_decay_val;
trace2_wdata <= trace2_decay_val;
x2_trace_wdata <= x2_decay_val;
y2_trace_wdata <= y2_decay_val;
y3_trace_wdata <= y3_decay_val;
end
end
if (epoch_counter == epoch_interval - 1 && homeo_target_rdata > 0) begin
if (spike_cnt_rdata > homeo_target_rdata) begin
homeo_thr_we <= 1;
homeo_thr_wdata <= (param_thr_rdata + $signed({8'd0, homeo_eta_rdata}) > THRESHOLD * 4)
? THRESHOLD * 4
: param_thr_rdata + $signed({8'd0, homeo_eta_rdata});
end else if (spike_cnt_rdata < homeo_target_rdata) begin
homeo_thr_we <= 1;
homeo_thr_wdata <= (param_thr_rdata - $signed({8'd0, homeo_eta_rdata}) < THRESHOLD / 4)
? THRESHOLD / 4
: param_thr_rdata - $signed({8'd0, homeo_eta_rdata});
end
end
saved_parent_ptr <= parent_ptr_rdata;
state <= S_UPDATE_WRITE;
end
S_UPDATE_PARENT_ADDR: begin
acc_addr <= saved_parent_ptr;
state <= S_UPDATE_PARENT_WAIT;
end
S_UPDATE_PARENT_WAIT: begin
state <= S_UPDATE_PARENT_ACC;
end
S_UPDATE_PARENT_ACC: begin
acc_we <= 1;
acc_addr <= saved_parent_ptr;
case (joinop_rdata)
2'd0:
acc_wdata <= acc_rdata + spike_contribution;
2'd1: begin
if (spike_contribution[NEURON_WIDTH-1] ?
(-spike_contribution > (acc_rdata[NEURON_WIDTH-1] ? -acc_rdata : acc_rdata)) :
(spike_contribution > (acc_rdata[NEURON_WIDTH-1] ? -acc_rdata : acc_rdata)))
acc_wdata <= spike_contribution;
else
acc_wdata <= acc_rdata;
end
2'd2:
acc_wdata <= acc_rdata | spike_contribution;
2'd3:
acc_wdata <= acc_rdata;
endcase
if (proc_neuron < NUM_NEURONS - 1) begin
proc_neuron <= proc_neuron + 1;
state <= S_UPDATE_INIT;
end else if (update_pass < num_updates - 1) begin
update_pass <= update_pass + 1;
proc_neuron <= 0;
state <= S_UPDATE_INIT;
end else begin
if (skip_idle_enable && !any_spike_this_ts) begin
state <= S_DONE;
end else if (learn_enable && epoch_counter == 0) begin
learn_neuron <= 0;
learn_mode <= 0;
state <= S_LEARN_MC_SCAN;
end else if (threefactor_enable && epoch_counter == 0) begin
elig_scan_addr <= 0;
elig_phase <= 0;
state <= S_ELIG_MC;
end else begin
state <= S_DONE;
end
end
end
S_UPDATE_WRITE: begin
nrn_we <= 1;
nrn_addr <= proc_neuron[NEURON_BITS-1:0];
nrn_wdata <= (proc_potential < vmin_ext) ? vmin_ext :
(proc_potential > vmax_ext) ? vmax_ext : proc_potential;
cur_we <= 1;
cur_addr <= proc_neuron[NEURON_BITS-1:0];
cur_wdata <= proc_current;
ref_we <= 1;
ref_addr <= proc_neuron[NEURON_BITS-1:0];
ref_wdata <= proc_refrac;
acc_we <= 1;
acc_addr <= proc_neuron[NEURON_BITS-1:0];
acc_wdata <= 0;
dend_acc_1_we <= 1;
dend_acc_1_addr <= proc_neuron[NEURON_BITS-1:0];
dend_acc_1_wdata <= 0;
dend_acc_2_we <= 1;
dend_acc_2_addr <= proc_neuron[NEURON_BITS-1:0];
dend_acc_2_wdata <= 0;
dend_acc_3_we <= 1;
dend_acc_3_addr <= proc_neuron[NEURON_BITS-1:0];
dend_acc_3_wdata <= 0;
trace_we <= 1;
trace_addr <= proc_neuron[NEURON_BITS-1:0];
trace2_we <= 1;
trace2_addr <= proc_neuron[NEURON_BITS-1:0];
x2_trace_we <= 1;
x2_trace_addr <= proc_neuron[NEURON_BITS-1:0];
y2_trace_we <= 1;
y2_trace_addr <= proc_neuron[NEURON_BITS-1:0];
y3_trace_we <= 1;
y3_trace_addr <= proc_neuron[NEURON_BITS-1:0];
spike_cnt_addr <= proc_neuron[NEURON_BITS-1:0];
if (epoch_counter == epoch_interval - 1) begin
spike_cnt_we <= 1;
spike_cnt_wdata <= spike_bitmap[proc_neuron[NEURON_BITS-1:0]] ? 8'd1 : 8'd0;
end else if (spike_bitmap[proc_neuron[NEURON_BITS-1:0]]) begin
spike_cnt_we <= 1;
spike_cnt_wdata <= spike_cnt_rdata + 8'd1;
end
if (proc_spiked_this_neuron && saved_parent_ptr != {NEURON_BITS{1'b1}}) begin
state <= S_UPDATE_PARENT_ADDR;
end else if (proc_neuron < NUM_NEURONS - 1) begin
proc_neuron <= proc_neuron + 1;
state <= S_UPDATE_INIT;
end else if (update_pass < num_updates - 1) begin
update_pass <= update_pass + 1;
proc_neuron <= 0;
state <= S_UPDATE_INIT;
end else begin
if (skip_idle_enable && !any_spike_this_ts) begin
state <= S_DONE;
end else if (learn_enable && epoch_counter == 0) begin
learn_neuron <= 0;
learn_mode <= 0;
state <= S_LEARN_MC_SCAN;
end else if (threefactor_enable && epoch_counter == 0) begin
elig_scan_addr <= 0;
elig_phase <= 0;
state <= S_ELIG_MC;
end else begin
state <= S_DONE;
end
end
end
S_LEARN_MC_SCAN: begin
if (learn_neuron == NUM_NEURONS) begin
if (learn_mode == 0) begin
learn_mode <= 1;
learn_neuron <= 0;
end else begin
if (threefactor_enable) begin
elig_scan_addr <= 0;
elig_phase <= 0;
state <= S_ELIG_MC;
end else begin
state <= S_DONE;
end
end
end else if (spike_bitmap[learn_neuron[NEURON_BITS-1:0]]) begin
learn_slot <= 0;
if (learn_mode == 0) begin
index_rd_addr <= learn_neuron[NEURON_BITS-1:0];
state <= S_LEARN_MC_IDX_WAIT;
end else begin
state <= S_LEARN_MC_SETUP;
end
end else begin
learn_neuron <= learn_neuron + 1;
end
end
S_LEARN_MC_IDX_WAIT: begin
state <= S_LEARN_MC_IDX_READ;
end
S_LEARN_MC_IDX_READ: begin
learn_base_addr <= index_rdata[COUNT_BITS +: POOL_ADDR_BITS];
learn_count <= index_rdata[COUNT_BITS-1:0];
if (index_rdata[COUNT_BITS-1:0] == 0 ||
index_rdata[INDEX_WIDTH-1 -: 2] != FMT_SPARSE) begin
learn_neuron <= learn_neuron + 1;
state <= S_LEARN_MC_SCAN;
end else begin
state <= S_LEARN_MC_SETUP;
end
end
S_LEARN_MC_SETUP: begin
if (learn_mode == 0) begin
pool_addr_r <= learn_base_addr + learn_slot;
elig_addr <= learn_base_addr + learn_slot;
end else begin
rev_addr <= {learn_neuron[NEURON_BITS-1:0], learn_slot[REV_SLOT_BITS-1:0]};
end
state <= S_LEARN_MC_WAIT1;
end
S_LEARN_MC_WAIT1: begin
state <= S_LEARN_MC_LOAD;
end
S_LEARN_MC_LOAD: begin
if (learn_mode == 0) begin
trace_addr <= pool_tgt_rdata;
trace2_addr <= pool_tgt_rdata;
x2_trace_addr <= pool_tgt_rdata;
y2_trace_addr <= pool_tgt_rdata;
y3_trace_addr <= pool_tgt_rdata;
spike_ts_addr <= pool_tgt_rdata;
end else begin
learn_rev_valid <= rev_rdata[REV_DATA_W-1];
learn_rev_src <= rev_rdata[POOL_ADDR_BITS +: NEURON_BITS];
learn_rev_pool_addr <= rev_rdata[POOL_ADDR_BITS-1:0];
trace_addr <= rev_rdata[POOL_ADDR_BITS +: NEURON_BITS];
trace2_addr <= rev_rdata[POOL_ADDR_BITS +: NEURON_BITS];
x2_trace_addr <= rev_rdata[POOL_ADDR_BITS +: NEURON_BITS];
y2_trace_addr <= rev_rdata[POOL_ADDR_BITS +: NEURON_BITS];
y3_trace_addr <= rev_rdata[POOL_ADDR_BITS +: NEURON_BITS];
pool_addr_r <= rev_rdata[POOL_ADDR_BITS-1:0];
elig_addr <= rev_rdata[POOL_ADDR_BITS-1:0];
spike_ts_addr <= rev_rdata[POOL_ADDR_BITS +: NEURON_BITS];
end
state <= S_LEARN_MC_WAIT2;
end
S_LEARN_MC_WAIT2: begin
state <= S_LEARN_MC_REGLD;
end
S_LEARN_MC_REGLD: begin
if (learn_mode == 1 && !learn_rev_valid) begin
state <= S_LEARN_MC_NEXT;
end else begin
mc_regs[0] <= $signed({8'd0, trace_rdata});
mc_regs[1] <= $signed({8'd0, x2_trace_rdata});
mc_regs[2] <= $signed({8'd0, trace2_rdata});
mc_regs[3] <= $signed({8'd0, y2_trace_rdata});
mc_regs[4] <= $signed({8'd0, y3_trace_rdata});
mc_regs[5] <= pool_wt_rdata;
mc_regs[6] <= $signed({10'd0, pool_delay_rdata});
mc_regs[7] <= pool_tag_rdata;
mc_regs[8] <= elig_rdata;
mc_regs[9] <= reward_trace;
mc_regs[10] <= $signed({8'd0, spike_ts_rdata});
mc_regs[11] <= 16'sd0;
mc_regs[12] <= 16'sd0;
mc_regs[13] <= 16'sd0;
mc_regs[14] <= 16'sd0;
mc_regs[15] <= 16'sd0;
mc_pc <= {threefactor_enable, learn_mode, 6'd0};
state <= S_LEARN_MC_FETCH;
end
end
S_LEARN_MC_FETCH: begin
state <= S_LEARN_MC_EXEC;
end
S_LEARN_MC_EXEC: begin
case (mc_opcode)
4'd0: begin
mc_pc <= mc_pc + 1;
state <= S_LEARN_MC_FETCH;
end
4'd1, 4'd2, 4'd3, 4'd4, 4'd5, 4'd6, 4'd7, 4'd8: begin
mc_regs[mc_dst] <= mc_alu_result;
mc_pc <= mc_pc + 1;
state <= S_LEARN_MC_FETCH;
end
4'd9: begin
pool_wt_we_r <= 1;
pool_wt_wr_addr <= learn_wr_addr;
pool_wt_wr_data <= mc_regs[5] + {{15{1'b0}}, lfsr[0]};
mc_pc <= mc_pc + 1;
state <= S_LEARN_MC_FETCH;
end
4'd10: begin
elig_we <= 1;
elig_addr <= learn_wr_addr;
elig_wdata <= mc_regs[8];
mc_pc <= mc_pc + 1;
state <= S_LEARN_MC_FETCH;
end
4'd11: begin
mc_pc <= (mc_regs[mc_src_a] == 0) ? (mc_pc + 2) : (mc_pc + 1);
state <= S_LEARN_MC_FETCH;
end
4'd12: begin
mc_pc <= (mc_regs[mc_src_a] != 0) ? (mc_pc + 2) : (mc_pc + 1);
state <= S_LEARN_MC_FETCH;
end
4'd13: begin
state <= S_LEARN_MC_NEXT;
end
4'd14: begin
pool_delay_we_learn <= 1;
pool_delay_learn_addr <= learn_wr_addr;
pool_delay_learn_data <= mc_regs[6][5:0];
mc_pc <= mc_pc + 1;
state <= S_LEARN_MC_FETCH;
end
4'd15: begin
pool_tag_we_r <= 1;
pool_tag_wr_addr <= learn_wr_addr;
pool_tag_wr_data <= mc_regs[7];
mc_pc <= mc_pc + 1;
state <= S_LEARN_MC_FETCH;
end
default: begin
mc_pc <= mc_pc + 1;
state <= S_LEARN_MC_FETCH;
end
endcase
end
S_LEARN_MC_NEXT: begin
if (learn_mode == 0) begin
if (learn_slot < learn_count - 1) begin
learn_slot <= learn_slot + 1;
state <= S_LEARN_MC_SETUP;
end else begin
learn_neuron <= learn_neuron + 1;
state <= S_LEARN_MC_SCAN;
end
end else begin
if (learn_slot < REV_FANIN - 1) begin
learn_slot <= learn_slot + 1;
state <= S_LEARN_MC_SETUP;
end else begin
learn_neuron <= learn_neuron + 1;
state <= S_LEARN_MC_SCAN;
end
end
end
S_ELIG_MC: begin
case (elig_phase)
2'd0: begin
if (elig_scan_addr >= pool_used_count) begin
state <= S_DONE;
end else begin
pool_addr_r <= elig_scan_addr[POOL_ADDR_BITS-1:0];
elig_addr <= elig_scan_addr[POOL_ADDR_BITS-1:0];
elig_phase <= 2'd1;
end
end
2'd1: begin
elig_phase <= 2'd2;
end
2'd2: begin
if (reward_trace != 0) begin
pool_wt_we_r <= 1;
pool_wt_wr_addr <= elig_scan_addr[POOL_ADDR_BITS-1:0];
pool_wt_wr_data <= elig_new_wt;
end
elig_we <= 1;
elig_wdata <= elig_decayed;
elig_scan_addr <= elig_scan_addr + 1;
elig_phase <= 2'd0;
end
default: elig_phase <= 2'd0;
endcase
end
S_DELAY_DRAIN_INIT: begin
drain_cnt <= delay_count[current_ts_mod64];
drain_idx <= 0;
if (delay_count[current_ts_mod64] == 0) begin
state <= S_DELIVER_POP;
end else begin
dq_addr <= {current_ts_mod64, {DELAY_ENTRY_BITS{1'b0}}};
state <= S_DELAY_DRAIN_QWAIT;
end
end
S_DELAY_DRAIN_QWAIT: begin
state <= S_DELAY_DRAIN_CAP;
end
S_DELAY_DRAIN_CAP: begin
dq_cap_target <= dq_rdata[DELAY_QUEUE_ENTRY_W-1 -: NEURON_BITS];
dq_cap_current <= dq_rdata[COMPARTMENT_BITS +: DATA_WIDTH];
dq_cap_comp <= dq_rdata[COMPARTMENT_BITS-1:0];
acc_addr <= dq_rdata[DELAY_QUEUE_ENTRY_W-1 -: NEURON_BITS];
dend_acc_1_addr <= dq_rdata[DELAY_QUEUE_ENTRY_W-1 -: NEURON_BITS];
dend_acc_2_addr <= dq_rdata[DELAY_QUEUE_ENTRY_W-1 -: NEURON_BITS];
dend_acc_3_addr <= dq_rdata[DELAY_QUEUE_ENTRY_W-1 -: NEURON_BITS];
state <= S_DELAY_DRAIN_AWAIT;
end
S_DELAY_DRAIN_AWAIT: begin
state <= S_DELAY_DRAIN_ACC;
end
S_DELAY_DRAIN_ACC: begin
case (dq_cap_comp)
2'd0: begin
acc_we <= 1;
acc_addr <= dq_cap_target;
acc_wdata <= acc_rdata + dq_cap_current;
end
2'd1: begin
dend_acc_1_we <= 1;
dend_acc_1_addr <= dq_cap_target;
dend_acc_1_wdata <= dend_acc_1_rdata + dq_cap_current;
end
2'd2: begin
dend_acc_2_we <= 1;
dend_acc_2_addr <= dq_cap_target;
dend_acc_2_wdata <= dend_acc_2_rdata + dq_cap_current;
end
2'd3: begin
dend_acc_3_we <= 1;
dend_acc_3_addr <= dq_cap_target;
dend_acc_3_wdata <= dend_acc_3_rdata + dq_cap_current;
end
endcase
if (drain_idx < drain_cnt - 1) begin
drain_idx <= drain_idx + 1;
dq_addr <= {current_ts_mod64, drain_idx + {{(DELAY_ENTRY_BITS-1){1'b0}}, 1'b1}};
state <= S_DELAY_DRAIN_QWAIT;
end else begin
delay_count[current_ts_mod64] <= 0;
state <= S_DELIVER_POP;
end
end
S_DONE: begin
fifo_sel <= ~fifo_sel;
if (fifo_sel)
fifo_b_clear <= 1;
else
fifo_a_clear <= 1;
timestep_done <= 1;
timestep_count <= timestep_count + 1;
current_ts_mod64 <= current_ts_mod64 + 1;
proc_neuron <= 0;
spike_bitmap <= 0;
epoch_counter <= (epoch_counter >= epoch_interval - 1) ? 8'd0 : epoch_counter + 8'd1;
timestep_within_epoch <= (epoch_counter >= epoch_interval - 1) ?
8'd0 : timestep_within_epoch + 8'd1;
was_idle <= ~any_spike_this_ts;
reward_trace <= rt_decayed + reward_value;
state <= S_IDLE;
end
default: state <= S_IDLE;
endcase
end
end
`ifdef SIMULATION
integer sim_init_i;
initial begin
for (sim_init_i = 0; sim_init_i < NUM_NEURONS; sim_init_i = sim_init_i + 1) begin
is_root_mem.mem[sim_init_i] = 1'b1;
end
for (sim_init_i = 0; sim_init_i < NUM_NEURONS; sim_init_i = sim_init_i + 1) begin
threshold_mem.mem[sim_init_i] = THRESHOLD;
leak_mem.mem[sim_init_i] = LEAK_RATE;
rest_mem.mem[sim_init_i] = RESTING_POT;
refrac_cfg_mem.mem[sim_init_i] = REFRAC_CYCLES;
vmin_mem.mem[sim_init_i] = 16'sh8000;
vmax_mem.mem[sim_init_i] = 16'sh7FFF;
tau1_cfg_mem.mem[sim_init_i] = TAU1_DEFAULT;
tau2_cfg_mem.mem[sim_init_i] = TAU2_DEFAULT;
parent_ptr_mem.mem[sim_init_i] = {NEURON_BITS{1'b1}};
end
ucode_mem.mem[0] = 32'hC000_0000;
ucode_mem.mem[1] = 32'hD000_0000;
ucode_mem.mem[2] = 32'h4B00_6000;
ucode_mem.mem[3] = 32'h255B_0000;
ucode_mem.mem[4] = 32'h8B00_0000;
ucode_mem.mem[5] = 32'h655B_0000;
ucode_mem.mem[6] = 32'h8B00_07D0;
ucode_mem.mem[7] = 32'h755B_0000;
ucode_mem.mem[8] = 32'h9000_0000;
ucode_mem.mem[9] = 32'hD000_0000;
ucode_mem.mem[64] = 32'hC000_0000;
ucode_mem.mem[65] = 32'hD000_0000;
ucode_mem.mem[66] = 32'h4B00_6000;
ucode_mem.mem[67] = 32'h155B_0000;
ucode_mem.mem[68] = 32'h8B00_0000;
ucode_mem.mem[69] = 32'h655B_0000;
ucode_mem.mem[70] = 32'h8B00_07D0;
ucode_mem.mem[71] = 32'h755B_0000;
ucode_mem.mem[72] = 32'h9000_0000;
ucode_mem.mem[73] = 32'hD000_0000;
ucode_mem.mem[128] = 32'hC000_0000;
ucode_mem.mem[129] = 32'hD000_0000;
ucode_mem.mem[130] = 32'h4B00_6000;
ucode_mem.mem[131] = 32'h288B_0000;
ucode_mem.mem[132] = 32'h8B00_FC18;
ucode_mem.mem[133] = 32'h688B_0000;
ucode_mem.mem[134] = 32'h8B00_03E8;
ucode_mem.mem[135] = 32'h788B_0000;
ucode_mem.mem[136] = 32'hA000_0000;
ucode_mem.mem[137] = 32'hD000_0000;
ucode_mem.mem[192] = 32'hC000_0000;
ucode_mem.mem[193] = 32'hD000_0000;
ucode_mem.mem[194] = 32'h4B00_6000;
ucode_mem.mem[195] = 32'h188B_0000;
ucode_mem.mem[196] = 32'h8B00_FC18;
ucode_mem.mem[197] = 32'h688B_0000;
ucode_mem.mem[198] = 32'h8B00_03E8;
ucode_mem.mem[199] = 32'h788B_0000;
ucode_mem.mem[200] = 32'hA000_0000;
ucode_mem.mem[201] = 32'hD000_0000;
end
`endif
endmodule