# Digital Signal Processing with Field Programmable Gate Arrays

Chapter02 - Home
Exercises: 1 - 2 - 3 - 4 - 5 - 6 - 7 - 8 - 9 - 10 - 11 - 12 - 13 - 14 - 15 - 16 - 17 - 18 - 19 - 20 - 21 - 22 - 23 - 24 - 25 - 26 - 27 - 28 - 29 - 30

Exercise. This is the solution to exercise 2.1 in the book.

Solution. (a)

$n=1$, no adder is needed, just an AND gate. Hence no layer of CSAs is needed.

$n=2$, requires 4 AND gates and 2 half adders. Since essentially a CSA is a full adder in this case a CSA layer is not needed.

$n=3$, requires 9 AND gates, 1 layer of CSAs and the final n-bit ripple-carry adder.

$n=4$, requires 16 AND gates, 2 layers of CSAs and the final n-bit ripple-carry adder.

$n=5$, requires 25 AND gates, 3 layers of CSAs and the final n-bit ripple-carry adder.

For $n=6$ and higher we’re going to use a more simplified notation:

and thus have:

For $n=7$:

For $n=8$:

For $n=12$:

We get the following number of levels and resources used (a final 2n ripple carry adder is always needed):

 $n$ Levels HAs CSAs 0-2 0 2 0 3 1 2 1 4 2 3 5 5 3 4 11 6 4 5 19 7 5 5 29 8 6 8 41 12 10 22 68

(b) Notice that the 3:2 carry-save adder can be implemented using a full adder by treating the carry in as the additional input and the carry out as an output. The Wallace tree multiplier design is faster than the array multiplier (where the carry is propagated within same layer of full adders rather than to the layer below) because it reduces the gate delay of the longest path. However, the routing between various CSA units is more complex in a Wallace tree multiplier vs. array multiplier. Since routing resources are valuable in FPGAs (especially in congested designs), Wallace trees are less preferred in spite of their improved gate delay.

(c) For the $8×8$ bit multiplier we can implement the Wallace tree and the pipelined ripple carry adder using the above diagram:

// synthesis translate_off
module tb;

reg clk;
reg reset;

// 100 MHz clk
always begin
clk = 1’b0;
#5;
clk = 1’b1;
#5;
end

initial begin
reset = 1’b1;
repeat (10) @(posedge clk);
reset = 1’b0;
end

reg [7:0] a, b;
reg [1:0] counter;
wire [15:0] mult;

always @(posedge clk or posedge reset) begin
if (reset) begin
a <= b0;
b <= b0;
counter <= b0;
end else begin
counter <= counter+1;
if (counter==0) begin
a <= $random; b <=$random;
if (mult != a*b)
$stop; end end end wallace_mult_8bit i_wallace_mult( .clk(clk), .reset(reset), .a(a), .b(b), .mult(mult) ); endmodule // synthesis translate_on module wallace_mult_8bit #( parameter N=8 ) ( input clk, input reset, input [N-1:0] a, input [N-1:0] b, output [2*N-1:0] mult ); reg [N-1:0] a_ff; reg [N-1:0] b_ff; // bus containing P(0),...P(N-2) wire[N-2:0] wallace_out; reg [N-2:0] wallace_out_ff; // ffs to the ripple carry adder reg [N-1:0] wallace_a_ff; reg [N-1:0] wallace_b_ff; // ffs output reg [2*N-1:0] mult_ff; // register the inputs for timing always @(posedge clk or posedge reset) begin if (reset) begin a_ff <= b0; b_ff <= b0; end else begin a_ff <= a; b_ff <= b; end end wire [N-1:0] ab [0:N-1]; // column index is a bus [column] ab [row] genvar i; generate for (i=0; i<N; i=i+1) begin : gen_and assign ab[i]=b_ff&{N{a_ff[i]}}; end endgenerate assign wallace_out[0] = ab[0][0]; // Level 1 // notation is ha/csa_(level)_(column)_(item) ha i_ha_1_1_0( .a(ab[1][0]), .b(ab[0][1]), .sum(wallace_out[1]), .cout(carry_1_1_0) ); csa i_csa_1_2_0 ( .a(ab[2][0]), .b(ab[1][1]), .c(ab[0][2]), .sum(sum_1_2_0), .cout(carry_1_2_0) ); csa i_csa_1_3_0( .a(ab[3][0]), .b(ab[2][1]), .c(ab[1][2]), .sum(sum_1_3_0), .cout(carry_1_3_0) ); csa i_csa_1_4_0( .a(ab[4][0]), .b(ab[3][1]), .c(ab[2][2]), .sum(sum_1_4_0), .cout(carry_1_4_0) ); csa i_csa_1_5_0( .a(ab[5][0]), .b(ab[4][1]), .c(ab[3][2]), .sum(sum_1_5_0), .cout(carry_1_5_0) ); csa i_csa_1_5_1( .a(ab[2][3]), .b(ab[1][4]), .c(ab[0][5]), .sum(sum_1_5_1), .cout(carry_1_5_1) ); csa i_csa_1_6_0( .a(ab[6][0]), .b(ab[5][1]), .c(ab[4][2]), .sum(sum_1_6_0), .cout(carry_1_6_0) ); csa i_csa_1_6_1( .a(ab[3][3]), .b(ab[2][4]), .c(ab[1][5]), .sum(sum_1_6_1), .cout(carry_1_6_1) ); csa i_csa_1_7_0( .a(ab[7][0]), .b(ab[6][1]), .c(ab[5][2]), .sum(sum_1_7_0), .cout(carry_1_7_0) ); csa i_csa_1_7_1( .a(ab[4][3]), .b(ab[3][4]), .c(ab[2][5]), .sum(sum_1_7_1), .cout(carry_1_7_1) ); csa i_csa_1_8_0( .a(ab[7][1]), .b(ab[6][2]), .c(ab[5][3]), .sum(sum_1_8_0), .cout(carry_1_8_0) ); csa i_csa_1_8_1( .a(ab[4][4]), .b(ab[3][5]), .c(ab[2][6]), .sum(sum_1_8_1), .cout(carry_1_8_1) ); csa i_csa_1_9_0( .a(ab[7][2]), .b(ab[6][3]), .c(ab[5][4]), .sum(sum_1_9_0), .cout(carry_1_9_0) ); csa i_csa_1_9_1( .a(ab[4][5]), .b(ab[3][6]), .c(ab[2][7]), .sum(sum_1_9_1), .cout(carry_1_9_1) ); csa i_csa_1_10_0( .a(ab[7][3]), .b(ab[6][4]), .c(ab[5][5]), .sum(sum_1_10_0), .cout(carry_1_10_0) ); csa i_csa_1_11_0( .a(ab[7][4]), .b(ab[6][5]), .c(ab[5][6]), .sum(sum_1_11_0), .cout(carry_1_11_0) ); csa i_csa_1_12_0( .a(ab[7][5]), .b(ab[6][6]), .c(ab[5][7]), .sum(sum_1_12_0), .cout(carry_1_12_0) ); // Level 2 // notation is ha/csa_(level)_(column)_(item) ha i_ha_2_2_0( .a(sum_1_2_0), .b(carry_1_1_0), .sum(wallace_out[2]), .cout(carry_2_2_0) ); csa i_csa_2_3_0( .a(ab[0][3]), .b(carry_1_2_0), .c(sum_1_3_0), .sum(sum_2_3_0), .cout(carry_2_3_0) ); csa i_csa_2_4_0( .a(ab[1][3]), .b(carry_1_3_0), .c(sum_1_4_0), .sum(sum_2_4_0), .cout(carry_2_4_0) ); csa i_csa_2_5_0( .a(carry_1_4_0), .b(sum_1_5_0), .c(sum_1_5_1), .sum(sum_2_5_0), .cout(carry_2_5_0) ); csa i_csa_2_6_0( .a(carry_1_5_0), .b(sum_1_6_0), .c(sum_1_6_1), .sum(sum_2_6_0), .cout(carry_2_6_0) ); csa i_csa_2_7_0( .a(sum_1_7_0), .b(sum_1_7_1), .c(carry_1_6_0), .sum(sum_2_7_0), .cout(carry_2_7_0) ); csa i_csa_2_7_1( .a(carry_1_6_1), .b(ab[1][6]), .c(ab[0][7]), .sum(sum_2_7_1), .cout(carry_2_7_1) ); csa i_csa_2_8_0( .a(sum_1_8_0), .b(sum_1_8_1), .c(carry_1_7_0), .sum(sum_2_8_0), .cout(carry_2_8_0) ); csa i_csa_2_9_0( .a(sum_1_9_0), .b(sum_1_9_1), .c(carry_1_8_0), .sum(sum_2_9_0), .cout(carry_2_9_0) ); csa i_csa_2_10_0( .a(sum_1_10_0), .b(carry_1_9_0), .c(carry_1_9_1), .sum(sum_2_10_0), .cout(carry_2_10_0) ); // Level 3 // notation is ha/csa_(level)_(column)_(item) ha i_ha_3_3_0( .a(sum_2_3_0), .b(carry_2_2_0), .sum(wallace_out[3]), .cout(carry_3_3_0) ); csa i_csa_3_4_0( .a(ab[0][4]), .b(sum_2_4_0), .c(carry_2_3_0), .sum(sum_3_4_0), .cout(carry_3_4_0) ); csa i_csa_3_6_0( .a(sum_2_6_0), .b(carry_1_5_1), .c(carry_2_5_0), .sum(sum_3_6_0), .cout(carry_3_6_0) ); csa i_csa_3_7_0( .a(sum_2_7_0), .b(sum_2_7_1), .c(carry_2_6_0), .sum(sum_3_7_0), .cout(carry_3_7_0) ); csa i_csa_3_8_0( .a(sum_2_8_0), .b(carry_2_7_0), .c(carry_1_7_1), .sum(sum_3_8_0), .cout(carry_3_8_0) ); csa i_csa_3_9_0( .a(sum_2_9_0), .b(carry_2_8_0), .c(carry_1_8_1), .sum(sum_3_9_0), .cout(carry_3_9_0) ); csa i_csa_3_10_0( .a(sum_2_10_0), .b(carry_2_9_0), .c(ab[4][6]), .sum(sum_3_10_0), .cout(carry_3_10_0) ); csa i_csa_3_11_0( .a(sum_1_11_0), .b(carry_2_10_0), .c(carry_1_10_0), .sum(sum_3_11_0), .cout(carry_3_11_0) ); // Level 4 // notation is ha/csa_(level)_(column)_(item) ha i_ha_4_4_0( .a(sum_3_4_0), .b(carry_3_3_0), .sum(wallace_out[4]), .cout(carry_4_4_0) ); csa i_csa_4_5_0( .a(sum_2_5_0), .b(carry_2_4_0), .c(carry_3_4_0), .sum(sum_4_5_0), .cout(carry_4_5_0) ); csa i_csa_4_8_0( .a(sum_3_8_0), .b(carry_3_7_0), .c(carry_2_7_1), .sum(sum_4_8_0), .cout(carry_4_8_0) ); // Level 5 // notation is ha/csa_(level)_(column)_(item) ha i_ha_5_5_0( .a(sum_4_5_0), .b(carry_4_4_0), .sum(wallace_out[5]), .cout(carry_5_5_0) ); csa i_csa_5_6_0( .a(sum_3_6_0), .b(ab[0][6]), .c(carry_4_5_0), .sum(sum_5_6_0), .cout(carry_5_6_0) ); ha i_ha_5_7_0( .a(sum_3_7_0), .b(carry_3_6_0), .sum(sum_5_7_0), .cout(carry_5_7_0) ); // Level 6 // notation is ha/csa_(level)_(column)_(item) ha i_ha_6_6_0( .a(sum_5_6_0), .b(carry_5_5_0), .sum(wallace_out[6]), .cout(carry_6_6_0) ); ha i_ha_6_7_0( .a(sum_5_7_0), .b(carry_5_6_0), .sum(sum_6_7_0), .cout(carry_6_7_0) ); csa i_csa_6_8_0( .a(sum_4_8_0), .b(ab[1][7]), .c(carry_5_7_0), .sum(sum_6_8_0), .cout(carry_6_8_0) ); csa i_csa_6_9_0( .a(sum_3_9_0), .b(carry_3_8_0), .c(carry_4_8_0), .sum(sum_6_9_0), .cout(carry_6_9_0) ); csa i_csa_6_10_0( .a(sum_3_10_0), .b(carry_3_9_0), .c(ab[3][7]), .sum(sum_6_10_0), .cout(carry_6_10_0) ); csa i_csa_6_11_0( .a(sum_3_11_0), .b(carry_3_10_0), .c(ab[4][7]), .sum(sum_6_11_0), .cout(carry_6_11_0) ); csa i_csa_6_12_0( .a(sum_1_12_0), .b(carry_1_11_0), .c(carry_3_11_0), .sum(sum_6_12_0), .cout(carry_6_12_0) ); csa i_csa_6_13_0( .a(ab[6][7]), .b(ab[7][6]), .c(carry_1_12_0), .sum(sum_6_13_0), .cout(carry_6_13_0) ); wire [N-1:0] ripple_sum_out; wire ripple_cout; always @(posedge clk or posedge reset) begin if (reset) begin wallace_out_ff <= b0; wallace_a_ff <= b0; wallace_b_ff <= b0; mult_ff<=’b0; end else begin wallace_out_ff <= wallace_out; wallace_a_ff <= {ab[7][7], sum_6_13_0, sum_6_12_0, sum_6_11_0, sum_6_10_0, sum_6_9_0, sum_6_8_0, sum_6_7_0}; wallace_b_ff <= {carry_6_13_0, carry_6_12_0, carry_6_11_0, carry_6_10_0, carry_6_9_0, carry_6_8_0, carry_6_7_0, carry_6_6_0}; mult_ff<={ripple_cout, ripple_sum_out, wallace_out_ff}; end end ripple_carry #( .N(N) ) i_ripple_carry ( .a(wallace_a_ff), .b(wallace_b_ff), .sum(ripple_sum_out), .cout(ripple_cout) ); assign mult = mult_ff; endmodule module ha ( input a, input b, output sum, output cout ); assign sum = a^b; assign cout = a&b; endmodule module csa ( input a, input b, input c, output sum, output cout ); assign sum = a^b^c; assign cout = (a&b)|(a&c)|(b&c); endmodule module ripple_carry #( parameter N=8 ) ( input [N-1:0] a, input [N-1:0] b, output [N-1:0] sum, output cout ); wire [N:1] cout_int; assign cout = cout_int[N]; ha i_ha ( .a(a[0]), .b(b[0]), .sum(sum[0]), .cout(cout_int[1]) ); genvar i; generate for (i=1; i<N; i=i+1) begin : gen_fa csa i_csa ( .a(a[i]), .b(b[i]), .c(cout_int[i]), .sum(sum[i]), .cout(cout_int[i+1]) ); end endgenerate endmodule which produces the following output: We can synthesize the design and determine Fmax using the makefile and qsf files from here. We determined that Fmax is 261.92 MHz. The design uses 16 LABs and 163 LEs. For the $12×12$ bit multiplier we also use the scheme above: // synthesis translate_off module tb; reg clk; reg reset; // 100 MHz clk always begin clk = 1’b0; #5; clk = 1’b1; #5; end initial begin reset = 1’b1; repeat (10) @(posedge clk); reset = 1’b0; end reg [11:0] a, b; reg [1:0] counter; wire [23:0] mult; always @(posedge clk or posedge reset) begin if (reset) begin a <= b0; b <= b0; counter <= b0; end else begin counter <= counter+1; if (counter==0) begin a <=$random;
b <= $random; if (mult != ({12’b0,a}*{12’b0,b})) begin$display("Exp 0x%x\ngot 0x%x", ({12’b0,a}*{12’b0,b}), mult);
\$stop;
end
end
end
end

wallace_mult_12bit i_wallace_mult(
.clk(clk),
.reset(reset),
.a(a),
.b(b),
.mult(mult)
);

endmodule
// synthesis translate_on

module wallace_mult_12bit #(
parameter N=12
) (
input clk,
input reset,

input [N-1:0] a,
input [N-1:0] b,

output [2*N-1:0] mult
);

reg [N-1:0] a_ff;
reg [N-1:0] b_ff;

// bus containing P(0),...P(N-2)
wire[N-2:0] wallace_out;
reg [N-2:0] wallace_out_ff;
// ffs to the ripple carry adder
reg [N-1:0] wallace_a_ff;
reg [N-1:0] wallace_b_ff;
// ffs output
reg [2*N-1:0] mult_ff;

// register the inputs for timing
always @(posedge clk or posedge reset) begin
if (reset) begin
a_ff <= b0;
b_ff <= b0;
end else begin
a_ff <= a;
b_ff <= b;
end
end

wire [N-1:0] ab [0:N-1]; // column index is a bus [column] ab [row]

genvar i;
generate
for (i=0; i<N; i=i+1) begin : gen_and
assign ab[i]=b_ff&{N{a_ff[i]}};
end
endgenerate

assign wallace_out[0] = ab[0][0];

// Level 1
// notation is ha/csa_(level)_(column)_(item)
ha i_ha_1_1_0 (
.a(ab[1][0]),
.b(ab[0][1]),
.sum(wallace_out[1]),
.cout(carry_1_1_0)
);

csa i_csa_1_2_0 (
.a(ab[2][0]),
.b(ab[1][1]),
.c(ab[0][2]),
.sum(sum_1_2_0),
.cout(carry_1_2_0)
);

csa i_csa_1_3_0 (
.a(ab[3][0]),
.b(ab[2][1]),
.c(ab[1][2]),
.sum(sum_1_3_0),
.cout(carry_1_3_0)
);

csa i_csa_1_4_0 (
.a(ab[4][0]),
.b(ab[3][1]),
.c(ab[2][2]),
.sum(sum_1_4_0),
.cout(carry_1_4_0)
);

csa i_csa_1_5_0 (
.a(ab[5][0]),
.b(ab[4][1]),
.c(ab[3][2]),
.sum(sum_1_5_0),
.cout(carry_1_5_0)
);
csa i_csa_1_5_1 (
.a(ab[2][3]),
.b(ab[1][4]),
.c(ab[0][5]),
.sum(sum_1_5_1),
.cout(carry_1_5_1)
);

csa i_csa_1_6_0 (
.a(ab[6][0]),
.b(ab[5][1]),
.c(ab[4][2]),
.sum(sum_1_6_0),
.cout(carry_1_6_0)
);
csa i_csa_1_6_1 (
.a(ab[3][3]),
.b(ab[2][4]),
.c(ab[1][5]),
.sum(sum_1_6_1),
.cout(carry_1_6_1)
);

csa i_csa_1_7_0 (
.a(ab[7][0]),
.b(ab[6][1]),
.c(ab[5][2]),
.sum(sum_1_7_0),
.cout(carry_1_7_0)
);
csa i_csa_1_7_1 (
.a(ab[4][3]),
.b(ab[3][4]),
.c(ab[2][5]),
.sum(sum_1_7_1),
.cout(carry_1_7_1)
);

csa i_csa_1_8_0 (
.a(ab[8][0]),
.b(ab[7][1]),
.c(ab[6][2]),
.sum(sum_1_8_0),
.cout(carry_1_8_0)
);
csa i_csa_1_8_1 (
.a(ab[5][3]),
.b(ab[4][4]),
.c(ab[3][5]),
.sum(sum_1_8_1),
.cout(carry_1_8_1)
);
csa i_csa_1_8_2 (
.a(ab[2][6]),
.b(ab[1][7]),
.c(ab[0][8]),
.sum(sum_1_8_2),
.cout(carry_1_8_2)
);

csa i_csa_1_9_0 (
.a(ab[9][0]),
.b(ab[8][1]),
.c(ab[7][2]),
.sum(sum_1_9_0),
.cout(carry_1_9_0)
);
csa i_csa_1_9_1 (
.a(ab[6][3]),
.b(ab[5][4]),
.c(ab[4][5]),
.sum(sum_1_9_1),
.cout(carry_1_9_1)
);
csa i_csa_1_9_2 (
.a(ab[3][6]),
.b(ab[2][7]),
.c(ab[1][8]),
.sum(sum_1_9_2),
.cout(carry_1_9_2)
);

csa i_csa_1_10_0 (
.a(ab[10][0]),
.b(ab[9][1]),
.c(ab[8][2]),
.sum(sum_1_10_0),
.cout(carry_1_10_0)
);
csa i_csa_1_10_1 (
.a(ab[7][3]),
.b(ab[6][4]),
.c(ab[5][5]),
.sum(sum_1_10_1),
.cout(carry_1_10_1)
);
csa i_csa_1_10_2 (
.a(ab[4][6]),
.b(ab[3][7]),
.c(ab[2][8]),
.sum(sum_1_10_2),
.cout(carry_1_10_2)
);

csa i_csa_1_11_0 (
.a(ab[11][0]),
.b(ab[10][1]),
.c(ab[9][2]),
.sum(sum_1_11_0),
.cout(carry_1_11_0)
);
csa i_csa_1_11_1 (
.a(ab[8][3]),
.b(ab[7][4]),
.c(ab[6][5]),
.sum(sum_1_11_1),
.cout(carry_1_11_1)
);
csa i_csa_1_11_2 (
.a(ab[5][6]),
.b(ab[4][7]),
.c(ab[3][8]),
.sum(sum_1_11_2),
.cout(carry_1_11_2)
);
csa i_csa_1_11_3 (
.a(ab[2][9]),
.b(ab[1][10]),
.c(ab[0][11]),
.sum(sum_1_11_3),
.cout(carry_1_11_3)
);

csa i_csa_1_12_0 (
.a(ab[11][1]),
.b(ab[10][2]),
.c(ab[9][3]),
.sum(sum_1_12_0),
.cout(carry_1_12_0)
);
csa i_csa_1_12_1 (
.a(ab[8][4]),
.b(ab[7][5]),
.c(ab[6][6]),
.sum(sum_1_12_1),
.cout(carry_1_12_1)
);
csa i_csa_1_12_2 (
.a(ab[5][7]),
.b(ab[4][8]),
.c(ab[3][9]),
.sum(sum_1_12_2),
.cout(carry_1_12_2)
);

csa i_csa_1_13_0 (
.a(ab[11][2]),
.b(ab[10][3]),
.c(ab[9][4]),
.sum(sum_1_13_0),
.cout(carry_1_13_0)
);
csa i_csa_1_13_1 (
.a(ab[8][5]),
.b(ab[7][6]),
.c(ab[6][7]),
.sum(sum_1_13_1),
.cout(carry_1_13_1)
);
csa i_csa_1_13_2 (
.a(ab[5][8]),
.b(ab[4][9]),
.c(ab[3][10]),
.sum(sum_1_13_2),
.cout(carry_1_13_2)
);

csa i_csa_1_14_0 (
.a(ab[11][3]),
.b(ab[10][4]),
.c(ab[9][5]),
.sum(sum_1_14_0),
.cout(carry_1_14_0)
);
csa i_csa_1_14_1 (
.a(ab[8][6]),
.b(ab[7][7]),
.c(ab[6][8]),
.sum(sum_1_14_1),
.cout(carry_1_14_1)
);
csa i_csa_1_14_2 (
.a(ab[5][9]),
.b(ab[4][10]),
.c(ab[3][11]),
.sum(sum_1_14_2),
.cout(carry_1_14_2)
);

csa i_csa_1_15_0 (
.a(ab[11][4]),
.b(ab[10][5]),
.c(ab[9][6]),
.sum(sum_1_15_0),
.cout(carry_1_15_0)
);
csa i_csa_1_15_1 (
.a(ab[8][7]),
.b(ab[7][8]),
.c(ab[6][9]),
.sum(sum_1_15_1),
.cout(carry_1_15_1)
);

csa i_csa_1_16_0 (
.a(ab[11][5]),
.b(ab[10][6]),
.c(ab[9][7]),
.sum(sum_1_16_0),
.cout(carry_1_16_0)
);
csa i_csa_1_16_1 (
.a(ab[8][8]),
.b(ab[7][9]),
.c(ab[6][10]),
.sum(sum_1_16_1),
.cout(carry_1_16_1)
);

csa i_csa_1_17_0 (
.a(ab[11][6]),
.b(ab[10][7]),
.c(ab[9][8]),
.sum(sum_1_17_0),
.cout(carry_1_17_0)
);
csa i_csa_1_17_1 (
.a(ab[8][9]),
.b(ab[7][10]),
.c(ab[6][11]),
.sum(sum_1_17_1),
.cout(carry_1_17_1)
);

csa i_csa_1_18_0 (
.a(ab[11][7]),
.b(ab[10][8]),
.c(ab[9][9]),
.sum(sum_1_18_0),
.cout(carry_1_18_0)
);

csa i_csa_1_19_0 (
.a(ab[11][8]),
.b(ab[10][9]),
.c(ab[9][10]),
.sum(sum_1_19_0),
.cout(carry_1_19_0)
);

csa i_csa_1_20_0 (
.a(ab[11][9]),
.b(ab[10][10]),
.c(ab[9][11]),
.sum(sum_1_20_0),
.cout(carry_1_20_0)
);

// Level 2
// notation is ha/csa_(level)_(column)_(item)
ha i_ha_2_2_0 (
.a(sum_1_2_0),
.b(carry_1_1_0),
.sum(wallace_out[2]),
.cout(carry_2_2_0)
);

csa i_csa_2_3_0 (
.a(ab[0][3]),
.b(carry_1_2_0),
.c(sum_1_3_0),
.sum(sum_2_3_0),
.cout(carry_2_3_0)
);

csa i_csa_2_4_0 (
.a(ab[1][3]),
.b(carry_1_3_0),
.c(sum_1_4_0),
.sum(sum_2_4_0),
.cout(carry_2_4_0)
);

csa i_csa_2_5_0 (
.a(carry_1_4_0),
.b(sum_1_5_0),
.c(sum_1_5_1),
.sum(sum_2_5_0),
.cout(carry_2_5_0)
);

csa i_csa_2_6_0 (
.a(carry_1_5_0),
.b(sum_1_6_0),
.c(sum_1_6_1),
.sum(sum_2_6_0),
.cout(carry_2_6_0)
);

csa i_csa_2_7_0 (
.a(sum_1_7_0),
.b(sum_1_7_1),
.c(carry_1_6_0),
.sum(sum_2_7_0),
.cout(carry_2_7_0)
);

csa i_csa_2_7_1 (
.a(carry_1_6_1),
.b(ab[1][6]),
.c(ab[0][7]),
.sum(sum_2_7_1),
.cout(carry_2_7_1)
);

csa i_csa_2_8_0 (
.a(sum_1_8_0),
.b(sum_1_8_1),
.c(sum_1_8_2),
.sum(sum_2_8_0),
.cout(carry_2_8_0)
);

csa i_csa_2_9_0 (
.a(sum_1_9_0),
.b(sum_1_9_1),
.c(sum_1_9_2),
.sum(sum_2_9_0),
.cout(carry_2_9_0)
);
csa i_csa_2_9_1 (
.a(carry_1_8_0),
.b(carry_1_8_1),
.c(carry_1_8_2),
.sum(sum_2_9_1),
.cout(carry_2_9_1)
);

csa i_csa_2_10_0 (
.a(sum_1_10_0),
.b(sum_1_10_1),
.c(sum_1_10_2),
.sum(sum_2_10_0),
.cout(carry_2_10_0)
);
csa i_csa_2_10_1 (
.a(carry_1_9_0),
.b(carry_1_9_1),
.c(carry_1_9_2),
.sum(sum_2_10_1),
.cout(carry_2_10_1)
);

csa i_csa_2_11_0 (
.a(sum_1_11_0),
.b(sum_1_11_1),
.c(sum_1_11_2),
.sum(sum_2_11_0),
.cout(carry_2_11_0)
);
csa i_csa_2_11_1 (
.a(sum_1_11_3),
.b(carry_1_10_0),
.c(carry_1_10_1),
.sum(sum_2_11_1),
.cout(carry_2_11_1)
);

csa i_csa_2_12_0 (
.a(sum_1_12_0),
.b(sum_1_12_1),
.c(sum_1_12_2),
.sum(sum_2_12_0),
.cout(carry_2_12_0)
);
csa i_csa_2_12_1 (
.a(carry_1_11_0),
.b(carry_1_11_1),
.c(carry_1_11_2),
.sum(sum_2_12_1),
.cout(carry_2_12_1)
);
csa i_csa_2_12_2 (
.a(carry_1_11_3),
.b(ab[2][10]),
.c(ab[1][11]),
.sum(sum_2_12_2),
.cout(carry_2_12_2)
);

csa i_csa_2_13_0 (
.a(sum_1_13_0),
.b(sum_1_13_1),
.c(sum_1_13_2),
.sum(sum_2_13_0),
.cout(carry_2_13_0)
);
csa i_csa_2_13_1 (
.a(carry_1_12_0),
.b(carry_1_12_1),
.c(carry_1_12_2),
.sum(sum_2_13_1),
.cout(carry_2_13_1)
);

csa i_csa_2_14_0 (
.a(sum_1_14_0),
.b(sum_1_14_1),
.c(sum_1_14_2),
.sum(sum_2_14_0),
.cout(carry_2_14_0)
);
csa i_csa_2_14_1 (
.a(carry_1_13_0),
.b(carry_1_13_1),
.c(carry_1_13_2),
.sum(sum_2_14_1),
.cout(carry_2_14_1)
);

csa i_csa_2_15_0 (
.a(sum_1_15_0),
.b(sum_1_15_1),
.c(carry_1_14_0),
.sum(sum_2_15_0),
.cout(carry_2_15_0)
);
csa i_csa_2_15_1 (
.a(carry_1_14_1),
.b(carry_1_14_2),
.c(ab[5][10]),
.sum(sum_2_15_1),
.cout(carry_2_15_1)
);

csa i_csa_2_16_0 (
.a(sum_1_16_0),
.b(sum_1_16_1),
.c(carry_1_15_0),
.sum(sum_2_16_0),
.cout(carry_2_16_0)
);

csa i_csa_2_17_0 (
.a(sum_1_17_0),
.b(sum_1_17_1),
.c(carry_1_16_0),
.sum(sum_2_17_0),
.cout(carry_2_17_0)
);

csa i_csa_2_18_0 (
.a(sum_1_18_0),
.b(carry_1_17_0),
.c(carry_1_17_1),
.sum(sum_2_18_0),
.cout(carry_2_18_0)
);

csa i_csa_2_19_0 (
.a(sum_1_19_0),
.b(carry_1_18_0),
.c(ab[8][11]),
.sum(sum_2_19_0),
.cout(carry_2_19_0)
);

// Level 3
// notation is ha/csa_(level)_(column)_(item)
ha i_ha_3_3_0 (
.a(sum_2_3_0),
.b(carry_2_2_0),
.sum(wallace_out[3]),
.cout(carry_3_3_0)
);

csa i_csa_3_4_0 (
.a(ab[0][4]),
.b(sum_2_4_0),
.c(carry_2_3_0),
.sum(sum_3_4_0),
.cout(carry_3_4_0)
);

csa i_csa_3_6_0 (
.a(sum_2_6_0),
.b(carry_1_5_1),
.c(carry_2_5_0),
.sum(sum_3_6_0),
.cout(carry_3_6_0)
);

csa i_csa_3_7_0 (
.a(sum_2_7_0),
.b(sum_2_7_1),
.c(carry_2_6_0),
.sum(sum_3_7_0),
.cout(carry_3_7_0)
);

csa i_csa_3_8_0 (
.a(sum_2_8_0),
.b(carry_1_7_0),
.c(carry_1_7_1),
.sum(sum_3_8_0),
.cout(carry_3_8_0)
);

csa i_csa_3_9_0 (
.a(sum_2_9_0),
.b(sum_2_9_1),
.c(carry_2_8_0),
.sum(sum_3_9_0),
.cout(carry_3_9_0)
);

csa i_csa_3_10_0 (
.a(sum_2_10_0),
.b(sum_2_10_1),
.c(carry_2_9_0),
.sum(sum_3_10_0),
.cout(carry_3_10_0)
);
csa i_csa_3_10_1 (
.a(carry_2_9_1),
.b(ab[0][10]),
.c(ab[1][9]),
.sum(sum_3_10_1),
.cout(carry_3_10_1)
);

csa i_csa_3_11_0 (
.a(sum_2_11_0),
.b(sum_2_11_1),
.c(carry_1_10_2),
.sum(sum_3_11_0),
.cout(carry_3_11_0)
);

csa i_csa_3_12_0 (
.a(sum_2_12_0),
.b(sum_2_12_1),
.c(sum_2_12_2),
.sum(sum_3_12_0),
.cout(carry_3_12_0)
);

csa i_csa_3_13_0 (
.a(sum_2_13_0),
.b(sum_2_13_1),
.c(carry_2_12_0),
.sum(sum_3_13_0),
.cout(carry_3_13_0)
);
csa i_csa_3_13_1 (
.a(carry_2_12_1),
.b(carry_2_12_2),
.c(ab[2][11]),
.sum(sum_3_13_1),
.cout(carry_3_13_1)
);

csa i_csa_3_14_0 (
.a(sum_2_14_0),
.b(sum_2_14_1),
.c(carry_2_13_0),
.sum(sum_3_14_0),
.cout(carry_3_14_0)
);

csa i_csa_3_15_0 (
.a(sum_2_15_0),
.b(sum_2_15_1),
.c(carry_2_14_0),
.sum(sum_3_15_0),
.cout(carry_3_15_0)
);

csa i_csa_3_16_0 (
.a(sum_2_16_0),
.b(carry_1_15_1),
.c(carry_2_15_0),
.sum(sum_3_16_0),
.cout(carry_3_16_0)
);

csa i_csa_3_17_0 (
.a(sum_2_17_0),
.b(carry_2_16_0),
.c(carry_1_16_1),
.sum(sum_3_17_0),
.cout(carry_3_17_0)
);

csa i_csa_3_18_0 (
.a(sum_2_18_0),
.b(carry_2_17_0),
.c(ab[7][11]),
.sum(sum_3_18_0),
.cout(carry_3_18_0)
);

// Level 4
// notation is ha/csa_(level)_(column)_(item)
ha i_ha_4_4_0(
.a(sum_3_4_0),
.b(carry_3_3_0),
.sum(wallace_out[4]),
.cout(carry_4_4_0)
);

csa i_csa_4_5_0(
.a(sum_2_5_0),
.b(carry_2_4_0),
.c(carry_3_4_0),
.sum(sum_4_5_0),
.cout(carry_4_5_0)
);

csa i_csa_4_8_0(
.a(sum_3_8_0),
.b(carry_2_7_0),
.c(carry_2_7_1),
.sum(sum_4_8_0),
.cout(carry_4_8_0)
);

csa i_csa_4_11_0(
.a(sum_3_11_0),
.b(carry_2_10_0),
.c(carry_2_10_1),
.sum(sum_4_11_0),
.cout(carry_4_11_0)
);

csa i_csa_4_12_0(
.a(sum_3_12_0),
.b(carry_2_11_0),
.c(carry_2_11_1),
.sum(sum_4_12_0),
.cout(carry_4_12_0)
);

csa i_csa_4_14_0(
.a(sum_3_14_0),
.b(carry_2_13_1),
.c(carry_3_13_0),
.sum(sum_4_14_0),
.cout(carry_4_14_0)
);

csa i_csa_4_15_0(
.a(sum_3_15_0),
.b(carry_2_14_1),
.c(carry_3_14_0),
.sum(sum_4_15_0),
.cout(carry_4_15_0)
);

csa i_csa_4_16_0(
.a(sum_3_16_0),
.b(carry_2_15_1),
.c(carry_3_15_0),
.sum(sum_4_16_0),
.cout(carry_4_16_0)
);

// Level 5
// notation is ha/csa_(level)_(column)_(item)
ha i_ha_5_5_0(
.a(sum_4_5_0),
.b(carry_4_4_0),
.sum(wallace_out[5]),
.cout(carry_5_5_0)
);

csa i_csa_5_6_0(
.a(sum_3_6_0),
.b(ab[0][6]),
.c(carry_4_5_0),
.sum(sum_5_6_0),
.cout(carry_5_6_0)
);

ha i_ha_5_7_0(
.a(sum_3_7_0),
.b(carry_3_6_0),
.sum(sum_5_7_0),
.cout(carry_5_7_0)
);

ha i_ha_5_8_0(
.a(sum_4_8_0),
.b(carry_3_7_0),
.sum(sum_5_8_0),
.cout(carry_5_8_0)
);

csa i_csa_5_9_0(
.a(sum_3_9_0),
.b(carry_3_8_0),
.c(carry_4_8_0),
.sum(sum_5_9_0),
.cout(carry_5_9_0)
);

csa i_csa_5_10_0(
.a(sum_3_10_0),
.b(sum_3_10_1),
.c(carry_3_9_0),
.sum(sum_5_10_0),
.cout(carry_5_10_0)
);

csa i_csa_5_11_0(
.a(sum_4_11_0),
.b(carry_3_10_0),
.c(carry_3_10_1),
.sum(sum_5_11_0),
.cout(carry_5_11_0)
);

csa i_csa_5_12_0(
.a(sum_4_12_0),
.b(carry_3_11_0),
.c(carry_4_11_0),
.sum(sum_5_12_0),
.cout(carry_5_12_0)
);

csa i_csa_5_13_0(
.a(sum_3_13_0),
.b(sum_3_13_1),
.c(carry_3_12_0),
.sum(sum_5_13_0),
.cout(carry_5_13_0)
);

// Level 6
// notation is ha/csa_(level)_(column)_(item)
ha i_ha_6_6_0(
.a(sum_5_6_0),
.b(carry_5_5_0),
.sum(wallace_out[6]),
.cout(carry_6_6_0)
);

ha i_ha_6_7_0(
.a(sum_5_7_0),
.b(carry_5_6_0),
.sum(sum_6_7_0),
.cout(carry_6_7_0)
);

ha i_ha_6_8_0(
.a(sum_5_8_0),
.b(carry_5_7_0),
.sum(sum_6_8_0),
.cout(carry_6_8_0)
);

csa i_csa_6_9_0(
.a(sum_5_9_0),
.b(ab[0][9]),
.c(carry_5_8_0),
.sum(sum_6_9_0),
.cout(carry_6_9_0)
);

ha i_ha_6_10_0 (
.a(sum_5_10_0),
.b(carry_5_9_0),
.sum(sum_6_10_0),
.cout(carry_6_10_0)
);

ha i_ha_6_11_0 (
.a(sum_5_11_0),
.b(carry_5_10_0),
.sum(sum_6_11_0),
.cout(carry_6_11_0)
);

// Level 7
// notation is ha/csa_(level)_(column)_(item)
ha i_ha_7_7_0(
.a(sum_6_7_0),
.b(carry_6_6_0),
.sum(wallace_out[7]),
.cout(carry_7_7_0)
);

ha i_ha_7_8_0(
.a(sum_6_8_0),
.b(carry_6_7_0),
.sum(sum_7_8_0),
.cout(carry_7_8_0)
);

ha i_ha_7_9_0(
.a(sum_6_9_0),
.b(carry_6_8_0),
.sum(sum_7_9_0),
.cout(carry_7_9_0)
);

ha i_ha_7_10_0(
.a(sum_6_10_0),
.b(carry_6_9_0),
.sum(sum_7_10_0),
.cout(carry_7_10_0)
);

// Level 8
// notation is ha/csa_(level)_(column)_(item)
ha i_ha_8_8_0(
.a(sum_7_8_0),
.b(carry_7_7_0),
.sum(wallace_out[8]),
.cout(carry_8_8_0)
);

// Level 9
// notation is ha/csa_(level)_(column)_(item)
csa i_csa_9_9_0 (
.a(sum_7_9_0),
.b(carry_7_8_0),
.c(carry_8_8_0),
.sum(wallace_out[9]),
.cout(carry_9_9_0)
);

// Level 10
// notation is ha/csa_(level)_(column)_(item)
csa i_csa_10_10_0 (
.a(sum_7_10_0),
.b(carry_7_9_0),
.c(carry_9_9_0),
.sum(wallace_out[10]),
.cout(carry_10_10_0)
);
csa i_csa_10_11_0 (
.a(sum_6_11_0),
.b(carry_6_10_0),
.c(carry_7_10_0),
.sum(sum_10_11_0),
.cout(carry_10_11_0)
);
csa i_csa_10_12_0 (
.a(sum_5_12_0),
.b(carry_5_11_0),
.c(carry_6_11_0),
.sum(sum_10_12_0),
.cout(carry_10_12_0)
);
csa i_csa_10_13_0 (
.a(sum_5_13_0),
.b(carry_5_12_0),
.c(carry_4_12_0),
.sum(sum_10_13_0),
.cout(carry_10_13_0)
);
csa i_csa_10_14_0 (
.a(sum_4_14_0),
.b(carry_3_13_1),
.c(carry_5_13_0),
.sum(sum_10_14_0),
.cout(carry_10_14_0)
);
csa i_csa_10_15_0 (
.a(sum_4_15_0),
.b(carry_4_14_0),
.c(ab[4][11]),
.sum(sum_10_15_0),
.cout(carry_10_15_0)
);
csa i_csa_10_16_0 (
.a(sum_4_16_0),
.b(carry_4_15_0),
.c(ab[5][11]),
.sum(sum_10_16_0),
.cout(carry_10_16_0)
);
csa i_csa_10_17_0 (
.a(sum_3_17_0),
.b(carry_3_16_0),
.c(carry_4_16_0),
.sum(sum_10_17_0),
.cout(carry_10_17_0)
);
csa i_csa_10_18_0 (
.a(sum_3_18_0),
.b(carry_3_17_0),
.c(ab[8][10]),
.sum(sum_10_18_0),
.cout(carry_10_18_0)
);
csa i_csa_10_19_0 (
.a(sum_2_19_0),
.b(carry_2_18_0),
.c(carry_3_18_0),
.sum(sum_10_19_0),
.cout(carry_10_19_0)
);
csa i_csa_10_20_0 (
.a(sum_1_20_0),
.b(carry_1_19_0),
.c(carry_2_19_0),
.sum(sum_10_20_0),
.cout(carry_10_20_0)
);
csa i_csa_10_21_0 (
.a(carry_1_20_0),
.b(ab[10][11]),
.c(ab[11][10]),
.sum(sum_10_21_0),
.cout(carry_10_21_0)
);

wire [N-1:0] ripple_sum_out;
wire ripple_cout;

always @(posedge clk or posedge reset) begin
if (reset) begin
wallace_out_ff <= b0;
wallace_a_ff <= b0;
wallace_b_ff <= b0;
mult_ff<=’b0;
end else begin
wallace_out_ff <= wallace_out;
wallace_a_ff <= {ab[11][11], sum_10_21_0, sum_10_20_0, sum_10_19_0, sum_10_18_0, sum_10_17_0, sum_10_16_0, sum_10_15_0, sum_10_14_0, sum_10_13_0, sum_10_12_0, sum_10_11_0};
wallace_b_ff <= {carry_10_21_0, carry_10_20_0, carry_10_19_0, carry_10_18_0, carry_10_17_0, carry_10_16_0, carry_10_15_0, carry_10_14_0, carry_10_13_0, carry_10_12_0, carry_10_11_0, carry_10_10_0};
mult_ff<={ripple_cout, ripple_sum_out, wallace_out_ff};
end
end

ripple_carry #(
.N(N)
) i_ripple_carry (
.a(wallace_a_ff),
.b(wallace_b_ff),
.sum(ripple_sum_out),
.cout(ripple_cout)
);

assign mult = mult_ff;

endmodule

module ha (
input a,
input b,
output sum,
output cout
);

assign sum = a^b;
assign cout = a&b;

endmodule

module csa (
input a,
input b,
input c,
output sum,
output cout
);

assign sum = a^b^c;
assign cout = (a&b)|(a&c)|(b&c);

endmodule

module ripple_carry #(
parameter N=8
) (
input [N-1:0] a,
input [N-1:0] b,

output [N-1:0] sum,
output cout
);

wire [N:1] cout_int;

assign cout = cout_int[N];

ha i_ha (
.a(a[0]),
.b(b[0]),
.sum(sum[0]),
.cout(cout_int[1])
);

genvar i;
generate
for (i=1; i<N; i=i+1) begin : gen_fa
csa i_csa (
.a(a[i]),
.b(b[i]),
.c(cout_int[i]),

.sum(sum[i]),
.cout(cout_int[i+1])
);
end
endgenerate

endmodule

which produces the following output:

We can synthesize the design and determine Fmax. We determined that Fmax is 198.1 MHz. The design uses 29 LABs and 379 LEs.