prjunnamed_memory/
lib.rs

1//! This library contains various common utilities for writing target-specific memory lowering passes.
2//!
3//! Target-specific memory lowering passes are organized roughly as follows:
4//!
5//! - for each memory in the design:
6//!   - a list of candidate lowerings is assembled:
7//!     - if a lowering mode (ff / lutram / blockram / hugeram) is explicitly selected, only the relevant
8//!       target cells types are considered
9//!     - for each available type of target cell:
10//!       - hard requirements are checked, and the target cell is rejected if they are unmet, eg:
11//!         - memories with more write ports than the target cell are rejected
12//!         - memories with asynchronous read ports are rejected if not supported by target cell
13//!       - a set of candidate "swizzle" transformations is assembled, for rearranging the memory geometry
14//!         to match the target cell
15//!         - the candidate swizzles are created according to target-specific rules, with some help from
16//!           common functions for eg. legalizing write mask granularity
17//!       - the best swizzle is selected, according to some metric (such as inserted soft mux depth or number
18//!         of used target cells)
19//!       - when a memory needs to be split/duplicated because of excessive read port count, each of the
20//!         split/duplicated sub-memories will have its own swizzle
21//!       - if a swizzle is egregiously bad (uses more resources than the entire FPGA has available), it is
22//!         rejected
23//!     - if applicable, a "fallback" lowering candidate is inserted into the list of candidates
24//!       - no fallback lowering if multiple write port domains present
25//!       - lowering to FFs for RAMs (very expensive)
26//!       - lowering to LUTs for ROMs (somewhat expensive)
27//!   - if no candidates are available, synthesis is aborted with a fatal error
28//!   - one of the candidate lowerings is picked, according to some target-specific heuristic
29//!     - initial heuristic will simply switch between target cells based on memory depth
30//!     - in the future, we will want to take target resource counts into account (moving the depth threshold
31//!       to avoid overfilling the FPGA)
32//!   - if the "fallback" lowering candidate has been selected, the common lowering code is called to perform it
33//!   - otherwise, target cell lowering is performed:
34//!     - the memory is split into sub-memories (using common code), as required to satisfy max read port count,
35//!       then each sub-memory is processed independently
36//!     - various fixup transformations (implemented in common code, but driven by target code) are performed on
37//!       the memory to match it with target cell capabilities:
38//!       - sync read ports are converted to async if required
39//!       - read port enable / clear / reset / initial value are unmapped and emulated if not supported
40//!       - transparency is unmapped and emulated if not supported
41//!       - read-first relations are emulated if not supported
42//!     - the swizzle (computed earlier) is applied to the memory (using common code), aligning its geometry
43//!       with the target cell
44//!     - the memory is consumed and converted into actual target cells
45//!     - any target-specific post-processing (such as merging output pipeline registers) is performed
46
47use std::collections::{btree_map, BTreeMap};
48
49use prjunnamed_netlist::{
50    Const, ControlNet, Design, FlipFlop, Memory, MemoryPortRelation, MemoryReadFlipFlop, MemoryReadPort,
51    MemoryWritePort, Net, Trit, Value,
52};
53
54/// Describes a "swizzle" transformation, used to align memory geometry to target requirements.
55///
56/// This structure is constructed by the target, with some help from common code.
57///
58/// The swizzle transformation involves the following steps:
59///
60/// 1. The data swizzle is applied to the write data, write mask, and read data signals.  This is used
61///    to insert dummy data bits to ensure that target cell's write mask granularity requirements are met.
62///
63/// 2. The address bits selected by `soft_addr_bits_mask` are removed from all ports, and implemented using soft
64///    logic.  This has the following effects:
65///
66///    - memory depth is divided by two (for each removed address bit)
67///    - memory width is multiplied by two (for each removed address bit)
68///    - for each write port:
69///      - if the removed address bit was implicit (as part of a wide port), the port width doesn't change
70///      - otherwise, the port width is multiplied by two, and a mux is inserted
71///    - for each write port:
72///      - if the removed address bit was implicit (as part of a wide port), the port width doesn't change
73///      - otherwise, the port width is multiplied by two, and write mask is adjusted with address decoding logic
74///
75///    This part of the transformation is used to deal with memories that have excessively asymmetric port widths.
76///
77/// 3. The `wide_log2` of every write port is adjusted up to the factor requested in `write_wide_log2`, by doubling
78///    port width and inserting write address decode logic as necessary.  It is an error if the requested factor
79///    is smaller than the current one (after applying step #2).
80///
81///    This is used for funny cases like iCE40 memories, which require using 16-bit write ports to have per-bit
82///    write masks.
83///
84/// 4. The `wide_log2` of every read port is adjusted up to the factor requested in `read_wide_log2`, by doubling
85///    port width and inserting muxes as necessary.  It is an error if the requested factor is smaller than the
86///    current one (after applying step #2).
87///
88///    This is used for obscure cases requiring a *minimum* port width asymmetry such as the Xilinx `RAM64X8SW` cell.
89///
90/// 5. The memory depth is adjusted to be at most `2 ** hard_addr_bits`.  Further, all read and write ports are
91///    adjusted to have an address signal that is exactly `hard_addr_bits - port.wide_log2()` bits.
92///
93///    If the memory has larger depth than this limit, it is split up into `2 ** hard_addr_bits`-deep chunks,
94///    the chunks are assembled "side-by-side" (converting depth to width), and appropriate read multiplexers
95///    and write address decoders are inserted.
96///
97/// 6. The memory is cut along its width, into `data_width_unit`-sized chunks, which are returned as individual
98///    [`Memory`] structures.  The last chunk is padded with dummy bits.
99#[derive(Debug, Clone, PartialEq, Eq)]
100pub struct MemorySwizzle {
101    /// The data swizzle.  In the first step, the data width of the memory is adjusted to the length of this
102    /// vector.  Each bit of the new memory width is described by an entry of this vector.  A `Some(idx)` means
103    /// that the bit should correspond to the given bit slice in the original memory, while a `None` means
104    /// a dummy bit inserted for legalization purposes.
105    pub data_swizzle: Vec<Option<usize>>,
106    pub soft_addr_bits_mask: u32,
107    pub write_wide_log2: Vec<usize>,
108    pub read_wide_log2: Vec<usize>,
109    pub hard_addr_bits: usize,
110    pub data_width_unit: usize,
111}
112
113/// An extension trait for `Memory` with assorted memory lowering utility functions.
114pub trait MemoryExt {
115    /// Returns a new memory, with the same dimensions and write ports as this one, but only the selected
116    /// subset of read ports.
117    ///
118    /// `output` should be the output value of the memory.  This function will slice out the bits corresponding
119    /// to the selected read ports, and return them.
120    fn extract_read_ports(&self, ports: &[usize], output: &Value) -> (Memory, Value);
121
122    /// Converts a synchronous read port to an asynchronous read port, by effectively extracting a flip-flop.
123    /// If transparency relations are involved, soft transparency logic is inserted.
124    ///
125    /// `output` is the output value of the memory cell.  This function performs a `replace_value` on the bits
126    /// corresponding to the converted port, and changes them in-place in the argument to newly-added void nets
127    /// that are to be driven by the newly-converted port.  Thus, the original memory cell must be unalived, and
128    /// `replace_value` must be called on the new value of `output`, possibly after further lowering.
129    ///
130    /// If the given read port is already asynchronous, the function will return without doing anything.
131    fn unmap_read_dff(&mut self, design: &Design, port_index: usize, output: &mut Value);
132
133    /// Unmaps the following features from a synchronous read port:
134    ///
135    /// - reset
136    /// - clear
137    /// - initial value
138    /// - if `include_transparency` is true, all `MemoryPortRelation::Transparent` relations
139    ///
140    /// If none of the above are used on the given port, the function will return without doing anything.
141    ///
142    /// Note that, if any of the above features is unmapped, all of reset, clear, and initial value can be unmapped
143    /// for free (it would actually be more complex to *not* unmap them).  Thus, we do not have granular controls
144    /// for them.
145    ///
146    /// This function must not be called on asynchronous read ports.
147    fn unmap_read_init_reset_transparency(
148        &mut self,
149        design: &Design,
150        port_index: usize,
151        include_transparency: bool,
152        output: &mut Value,
153    );
154
155    // TODO: needs a `can_emulate_read_before_write()`
156
157    /// Emulates all read-before-write relations in the memory.
158    ///
159    /// This function has strict preconditions:
160    ///
161    /// - all read ports must be synchronous and have the same clock
162    /// - a single write port cannot have both read-before-write and transparent relations
163    ///
164    /// If there are no read-before-write relations in the memory, this function will return without doing anything.
165    ///
166    /// The read-before-write relations are emulated by delaying all write ports with read-before-write relations
167    /// by half a cycle:  the clock polarity of the relevant write ports is flipped, and flip-flops (clocked by
168    /// the original clock) are added on the data, mask, and address inputs.
169    fn emulate_read_before_write(&mut self, design: &Design);
170
171    /// Constructs a `data_swizzle` for the [`MemorySwizzle`] structure, ensuring that the resulting memory
172    /// will satisfy mask granularity constraints for all write ports.  The argument must contain one number
173    /// for each write port, specifying its mask granularity in bits.  For each write port, the result will
174    /// satisfy the following constraints:
175    ///
176    /// - the result's length will be divisible by the specified granularity
177    /// - every aligned group of `granularity` bits will consist only of bits that share the same `mask` net
178    ///   in the write port; if the write port is wide, this rule applies individually to all of its subports
179    fn make_data_swizzle(&self, write_port_granularity: &[usize]) -> Vec<Option<usize>>;
180
181    /// Returns the depths of the memories that would be returned by `swizzle`, for cost estimation purposes.
182    /// Each entry in the result vector corresponds to one memory.
183    ///
184    /// For the simple case of the target mapping every memory to a single target cell, the length
185    /// of the returned vector is equal to the number of target cells used.  For more advanced mappings
186    /// where the target can lower a memory to multiple target cells (e.g. where hardware cascade or chip
187    /// selects are involved), the target can look at the exact memory depths required and perform its own
188    /// computations.
189    fn swizzle_depths(&self, swizzle: &MemorySwizzle) -> Vec<usize>;
190
191    /// Returns the number of soft-decoded address bits for each write port and read port for a given `swizzle`.
192    ///
193    /// Assumes that the swizzle applies to a memory where `extract_read_ports` has been called first with
194    /// the given `read_ports` argument.
195    ///
196    /// The first returned vector is the number of soft-decoded address bits for each write port, and the second
197    /// is the number of soft-decoded address bits for each read port included in `read_ports`.
198    fn swizzle_mux_bits(&self, read_ports: &[usize], swizzle: &MemorySwizzle) -> (Vec<usize>, Vec<usize>);
199
200    /// Performs a [`MemorySwizzle`] transformation on a memory, and returns a list of new memories with
201    /// the corresponding outputs that need to be driven.  The memories should be further lowered by the target.
202    fn swizzle(self, design: &Design, output: &Value, swizzle: &MemorySwizzle) -> Vec<(Memory, Value)>;
203
204    /// Returns true iff the memory is legal for fallback lowering.
205    ///
206    /// A memory is legal for fallback lowering iff all write ports share the same clock.
207    fn can_lower_fallback(&self) -> bool;
208
209    /// Performs fallback lowering of the memory.
210    ///
211    /// If the memory has write ports, a flip-flop is created for every bit of the memory, with associated
212    /// write address decoding logic and read muxes.  It is thus *very* inefficient unless the memory depth
213    /// is *very* small.
214    ///
215    /// If the memory has no write ports, it is lowered to just a tree of muxes for every read port, with
216    /// constants at the leafs.  On FPGA targets, this will be lowered to a fairly simple tree of LUTs,
217    /// which can be reasonably efficient for memories of small depth.
218    fn lower_fallback(self, design: &Design, output: &Value);
219}
220
221// creates a transparency emulation mux, to be used by both sync-to-async conversion and transparency emulation.
222// the data and mask given must be the same width as the read port.  the result is new data and mask:
223// if the write port is writing to the same address as the read port is reading, the write data will be
224// multiplexed onto the newly-returned data according to the write mask, and the new mask will be an OR of
225// the input mask and the write mask.  otherwise, the returned data and mask are the same as the input.
226// the mask can be None if the caller doesn't need to track it.
227fn transparency_mux(
228    memory: &Memory,
229    design: &Design,
230    read_port_index: usize,
231    write_port_index: usize,
232    data: Value,
233    mask: Option<Value>,
234) -> (Value, Option<Value>) {
235    let read_port = &memory.read_ports[read_port_index];
236    let write_port = &memory.write_ports[write_port_index];
237
238    // adjust write data/mask to read port width
239    let write_wide_log2 = write_port.wide_log2(memory);
240    let read_wide_log2 = read_port.wide_log2(memory);
241    let (write_addr, read_addr, write_data, write_mask) = match write_wide_log2.cmp(&read_wide_log2) {
242        std::cmp::Ordering::Less => {
243            // read wider than write: demux write data/mask using lower write addr bits
244            let wide_log2_diff = read_wide_log2 - write_wide_log2;
245            let select = write_port.addr.slice(..wide_log2_diff);
246            let write_addr = write_port.addr.slice(wide_log2_diff..);
247            let write_data =
248                design.add_shl(write_port.data.zext(read_port.data_len), &select, write_port.data.len() as u32);
249            let write_mask =
250                design.add_shl(write_port.mask.zext(read_port.data_len), &select, write_port.mask.len() as u32);
251            (write_addr, read_port.addr.clone(), write_data, write_mask)
252        }
253        std::cmp::Ordering::Equal => {
254            (write_port.addr.clone(), read_port.addr.clone(), write_port.data.clone(), write_port.mask.clone())
255        }
256        std::cmp::Ordering::Greater => {
257            // write wider than read: select write data/mask slices from wrport using lower read addr bits
258            let wide_log2_diff = write_wide_log2 - read_wide_log2;
259            let select = read_port.addr.slice(..wide_log2_diff);
260            let read_addr = read_port.addr.slice(wide_log2_diff..);
261            let write_data =
262                design.add_ushr(&write_port.data, &select, read_port.data_len as u32).slice(..read_port.data_len);
263            let write_mask =
264                design.add_ushr(&write_port.mask, &select, read_port.data_len as u32).slice(..read_port.data_len);
265            (write_port.addr.clone(), read_addr, write_data, write_mask)
266        }
267    };
268
269    // compare the relevant address bits
270    let max_addr_width = std::cmp::max(read_addr.len(), write_addr.len());
271    let addr_eq = design.add_eq(read_addr.zext(max_addr_width), write_addr.zext(max_addr_width));
272
273    // perform actual muxing
274    let sel_write = design.add_dedup_mux(addr_eq, &write_mask, Value::zero(write_mask.len()));
275    let new_data = design.add_bitwise_mux(sel_write, write_data, data);
276    let new_mask = mask.map(|mask| design.add_dedup_mux(addr_eq, design.add_or(&mask, write_mask), mask));
277
278    (new_data, new_mask)
279}
280
281impl MemoryExt for Memory {
282    fn extract_read_ports(&self, ports: &[usize], output: &Value) -> (Memory, Value) {
283        let mut new_memory = Memory {
284            depth: self.depth,
285            width: self.width,
286            init_value: self.init_value.clone(),
287            write_ports: self.write_ports.clone(),
288            read_ports: vec![],
289        };
290        let mut new_output = Value::new();
291        for &port_index in ports {
292            new_memory.read_ports.push(self.read_ports[port_index].clone());
293            let output_slice = self.read_port_output_slice(port_index);
294            new_output.extend(output.slice(output_slice));
295        }
296        (new_memory, new_output)
297    }
298
299    fn unmap_read_dff(&mut self, design: &Design, port_index: usize, output: &mut Value) {
300        let read_port = &mut self.read_ports[port_index];
301        let Some(port_flip_flop) = read_port.flip_flop.take() else {
302            return;
303        };
304        let new_port_output = design.add_void(read_port.data_len);
305        let mut data = new_port_output.clone();
306        for (write_port_index, relation) in port_flip_flop.relations.into_iter().enumerate() {
307            if relation == MemoryPortRelation::Transparent {
308                (data, _) = transparency_mux(self, design, port_index, write_port_index, data, None);
309            }
310        }
311        let q = design.add_dff(FlipFlop {
312            data,
313            clock: port_flip_flop.clock,
314            clear: port_flip_flop.clear,
315            reset: port_flip_flop.reset,
316            enable: port_flip_flop.enable,
317            reset_over_enable: port_flip_flop.reset_over_enable,
318            clear_value: port_flip_flop.clear_value,
319            reset_value: port_flip_flop.reset_value,
320            init_value: port_flip_flop.init_value,
321        });
322        let output_slice = self.read_port_output_slice(port_index);
323        design.replace_value(output.slice(output_slice.clone()), q);
324        output[output_slice.clone()].copy_from_slice(&new_port_output[..]);
325    }
326
327    fn unmap_read_init_reset_transparency(
328        &mut self,
329        design: &Design,
330        port_index: usize,
331        include_transparency: bool,
332        output: &mut Value,
333    ) {
334        let read_port = &self.read_ports[port_index];
335        let port_flip_flop = read_port.flip_flop.as_ref().unwrap();
336
337        let has_init = port_flip_flop.has_init_value();
338        let has_reset = port_flip_flop.has_reset();
339        let has_clear = port_flip_flop.has_clear();
340
341        // create transparency data and mask values. data is the value that should override the read data,
342        // and mask is the validity mask corresponding to it.
343        // if not unmapping transparency, just set them to (X, 0).
344        let mut data = Value::undef(read_port.data_len);
345        let mut mask = Some(Value::zero(read_port.data_len));
346        let mut has_transparency = false;
347        if include_transparency {
348            for (write_port_index, &relation) in port_flip_flop.relations.iter().enumerate() {
349                if relation == MemoryPortRelation::Transparent {
350                    (data, mask) = transparency_mux(self, design, port_index, write_port_index, data, mask);
351                    has_transparency = true;
352                }
353            }
354        }
355        let mask = mask.unwrap();
356
357        if !has_init && !has_reset && !has_clear && !has_transparency {
358            return;
359        }
360
361        // now delay the above by one cycle, and also mix in the unmapped clear/reset/init into it.
362        let read_port = &mut self.read_ports[port_index];
363        let port_flip_flop = read_port.flip_flop.as_mut().unwrap();
364        let data = design.add_dff(FlipFlop {
365            data,
366            clock: port_flip_flop.clock,
367            clear: port_flip_flop.clear,
368            reset: port_flip_flop.reset,
369            enable: port_flip_flop.enable,
370            reset_over_enable: port_flip_flop.reset_over_enable,
371            clear_value: std::mem::replace(&mut port_flip_flop.clear_value, Const::undef(read_port.data_len)),
372            reset_value: std::mem::replace(&mut port_flip_flop.reset_value, Const::undef(read_port.data_len)),
373            init_value: std::mem::replace(&mut port_flip_flop.init_value, Const::undef(read_port.data_len)),
374        });
375        let mask = design.add_dedup_dff(FlipFlop {
376            data: mask,
377            clock: port_flip_flop.clock,
378            clear: port_flip_flop.clear,
379            reset: port_flip_flop.reset,
380            enable: port_flip_flop.enable,
381            reset_over_enable: port_flip_flop.reset_over_enable,
382            clear_value: Const::ones(read_port.data_len),
383            reset_value: Const::ones(read_port.data_len),
384            init_value: if has_init { Const::ones(read_port.data_len) } else { Const::undef(read_port.data_len) },
385        });
386        if include_transparency {
387            for relation in &mut port_flip_flop.relations {
388                if *relation == MemoryPortRelation::Transparent {
389                    *relation = MemoryPortRelation::Undefined;
390                }
391            }
392        }
393        port_flip_flop.clear = ControlNet::ZERO;
394        port_flip_flop.reset = ControlNet::ZERO;
395
396        // perform the actual muxing.
397        let new_port_output = design.add_void(read_port.data_len);
398        let mux = design.add_bitwise_mux(mask, data, &new_port_output);
399
400        // and do the replacement.
401        let output_slice = self.read_port_output_slice(port_index);
402        design.replace_value(output.slice(output_slice.clone()), mux);
403        output[output_slice.clone()].copy_from_slice(&new_port_output[..]);
404    }
405
406    fn emulate_read_before_write(&mut self, design: &Design) {
407        let mut rdfirst_write_ports = vec![false; self.write_ports.len()];
408        let mut transparent_write_ports = vec![false; self.write_ports.len()];
409
410        for port in &self.read_ports {
411            let port_flip_flop = port.flip_flop.as_ref().unwrap();
412            assert_eq!(
413                port_flip_flop.clock,
414                self.read_ports[0].flip_flop.as_ref().unwrap().clock,
415                "all read ports must have the same clock"
416            );
417            for (write_port_index, &relation) in port_flip_flop.relations.iter().enumerate() {
418                match relation {
419                    MemoryPortRelation::Undefined => (),
420                    MemoryPortRelation::ReadBeforeWrite => rdfirst_write_ports[write_port_index] = true,
421                    MemoryPortRelation::Transparent => transparent_write_ports[write_port_index] = true,
422                }
423            }
424        }
425
426        let init_undef = self.init_value.is_undef();
427
428        for (port_index, port) in self.write_ports.iter_mut().enumerate() {
429            if !rdfirst_write_ports[port_index] {
430                continue;
431            }
432            assert!(
433                !transparent_write_ports[port_index],
434                "a single write port cannot have both a read-before-write and a transparent relation to two read ports"
435            );
436            port.addr = design.add_dff(FlipFlop::new(std::mem::take(&mut port.addr), port.clock));
437            port.data = design.add_dff(FlipFlop::new(std::mem::take(&mut port.data), port.clock));
438            port.mask = design.add_dedup_dff(
439                FlipFlop::new(std::mem::take(&mut port.mask), port.clock).with_init(if init_undef {
440                    Const::undef(port.data.len())
441                } else {
442                    Const::zero(port.data.len())
443                }),
444            );
445            port.clock = !port.clock;
446            for read_port in &mut self.read_ports {
447                let read_ff = read_port.flip_flop.as_mut().unwrap();
448                read_ff.relations[port_index] = MemoryPortRelation::Undefined;
449            }
450        }
451    }
452
453    fn make_data_swizzle(&self, write_port_granularity: &[usize]) -> Vec<Option<usize>> {
454        assert_eq!(self.write_ports.len(), write_port_granularity.len());
455        let mask_slices = Vec::from_iter((0..self.width).map(|bit_index| {
456            Vec::from_iter(self.write_ports.iter().map(|port| {
457                Value::from_iter(
458                    (0..(port.data.len() / self.width))
459                        .map(|subport_index| port.mask[subport_index * self.width + bit_index]),
460                )
461            }))
462        }));
463        let mut result = vec![];
464        for bit_index in 0..self.width {
465            'check_slices: loop {
466                for (port_index, &port_granularity) in write_port_granularity.iter().enumerate() {
467                    if result.len() % port_granularity != 0 {
468                        if mask_slices[bit_index][port_index] != mask_slices[bit_index - 1][port_index] {
469                            result.push(None);
470                            continue 'check_slices;
471                        }
472                    }
473                }
474                break;
475            }
476            result.push(Some(bit_index));
477        }
478        'align: loop {
479            for &port_granularity in write_port_granularity {
480                if result.len() % port_granularity != 0 {
481                    result.push(None);
482                    continue 'align;
483                }
484            }
485            break;
486        }
487        result
488    }
489
490    fn swizzle_depths(&self, swizzle: &MemorySwizzle) -> Vec<usize> {
491        // compute how many low soft address bits there are
492        let soft_addr_bits = swizzle.soft_addr_bits_mask.count_ones() as usize;
493        // compute the dimensions of memory after swizzle steps 1-2 applied
494        let base_width = swizzle.data_swizzle.len() << soft_addr_bits;
495        let base_depth = self.depth >> soft_addr_bits;
496        // compute how many chunks created in step 5 require the full `2 ** hard_addr_bits` depth
497        let num_chunks_full = base_depth >> swizzle.hard_addr_bits;
498        let depth_full = 1 << swizzle.hard_addr_bits;
499        // compute the depth of the last, partial chunk created in step 5
500        let depth_partial = base_depth & ((1 << swizzle.hard_addr_bits) - 1);
501        // determine how much of the width of memory created in step 5 requires the full depth
502        let width_full = base_width * num_chunks_full;
503        // and determine its total width
504        let width_total = if depth_partial != 0 { width_full + base_width } else { width_full };
505        // now align up both of these numbers to `data_width_unit` chunks
506        let num_mems_full = width_full.div_ceil(swizzle.data_width_unit);
507        let num_mems_total = width_total.div_ceil(swizzle.data_width_unit);
508        // and compute the final result
509        let mut result = vec![depth_full; num_mems_full];
510        while result.len() < num_mems_total {
511            result.push(depth_partial);
512        }
513        result
514    }
515
516    fn swizzle_mux_bits(&self, read_ports: &[usize], swizzle: &MemorySwizzle) -> (Vec<usize>, Vec<usize>) {
517        let soft_addr_bits = swizzle.soft_addr_bits_mask.count_ones() as usize;
518        let chunks = self.depth.div_ceil(1 << (soft_addr_bits + swizzle.hard_addr_bits));
519        let chunks_mux_bits = if chunks <= 1 { 0 } else { (chunks - 1).ilog2() as usize + 1 };
520        let write_mux_bits = Vec::from_iter(self.write_ports.iter().zip(swizzle.write_wide_log2.iter()).map(
521            |(port, &new_wide_log2)| {
522                let old_wide_log2 = port.wide_log2(self);
523                chunks_mux_bits + soft_addr_bits + new_wide_log2 - old_wide_log2
524            },
525        ));
526        let read_mux_bits = Vec::from_iter(read_ports.iter().zip(swizzle.read_wide_log2.iter()).map(
527            |(&port_index, &new_wide_log2)| {
528                let port = &self.read_ports[port_index];
529                let old_wide_log2 = port.wide_log2(self);
530                chunks_mux_bits + soft_addr_bits + new_wide_log2 - old_wide_log2
531            },
532        ));
533        (write_mux_bits, read_mux_bits)
534    }
535
536    fn swizzle(mut self, design: &Design, output: &Value, swizzle: &MemorySwizzle) -> Vec<(Memory, Value)> {
537        // 0-width memories can have no outputs, and can just be unalived without remorse.
538        if self.width == 0 {
539            return vec![];
540        }
541        // 0-depth memories are annoying to deal with — get them out of the way.
542        if self.depth == 0 {
543            // while you cannot really read any row, the read ports can still have well-defined values
544            // because of initial values and resets.  deal with them by converting them to async ones.
545            let mut output = output.clone();
546            for port_index in 0..self.read_ports.len() {
547                self.unmap_read_dff(design, port_index, &mut output);
548            }
549            // now we have only asynchronous read ports, and can replace all their outputs with undef.
550            design.replace_value(&output, Value::undef(output.len()));
551            return vec![];
552        }
553
554        // first, replicate the calculations from `swizzle_size`
555        let soft_addr_bits = swizzle.soft_addr_bits_mask.count_ones() as usize;
556        let base_width = swizzle.data_swizzle.len() << soft_addr_bits;
557        let base_depth = self.depth >> soft_addr_bits;
558        let depth_full = 1 << swizzle.hard_addr_bits;
559        let depth_partial = base_depth & ((1 << swizzle.hard_addr_bits) - 1);
560        let num_chunks_full = base_depth >> swizzle.hard_addr_bits;
561        let num_chunks_total = if depth_partial == 0 { num_chunks_full } else { num_chunks_full + 1 };
562        let width_full = base_width * num_chunks_full;
563        let width_total = base_width * num_chunks_total;
564        let num_mems_full = width_full.div_ceil(swizzle.data_width_unit);
565        let num_mems_total = width_total.div_ceil(swizzle.data_width_unit);
566        let width_total_aligned = num_mems_total * swizzle.data_width_unit;
567
568        // performs steps 2-5 of the transformation on a port's address; the first value returned is
569        // the new address for the port (a subset of the original address bits, potentially padded with 0 at the top),
570        // while the second value is the remaining address bits (known as mux addr), which should be used to form
571        // a mux / write address decoder.
572        // the third value describes the swizzle for the data bits: it is indexed first by the mux_addr value, second by
573        // the original port's subport index. the value is (new subport index, chunk index within the new port).
574        fn swizzle_addr(
575            addr: &Value,
576            swizzle: &MemorySwizzle,
577            num_chunks_total: usize,
578            orig_wide_log2: usize,
579            new_wide_log2: usize,
580        ) -> (Value, Value, Vec<Vec<(usize, usize)>>) {
581            #[derive(Clone, Copy)]
582            enum BitDisposition {
583                NewSubportBit(usize),
584                ChunkIndexBit(usize),
585            }
586
587            let mut num_chunks_port = num_chunks_total;
588            let mut hw_addr = Value::new();
589            let mut mux_addr = Value::new();
590            let mut old_subport_bit_disposition = vec![];
591            let mut mux_addr_bit_disposition = vec![];
592            let mut new_subport_bit = 0;
593            let mut new_chunk_bit = 0;
594            for addr_bit_index in 0..orig_wide_log2 {
595                if ((swizzle.soft_addr_bits_mask >> addr_bit_index) & 1) != 0 {
596                    old_subport_bit_disposition.push(BitDisposition::ChunkIndexBit(new_chunk_bit));
597                    new_chunk_bit += 1;
598                } else {
599                    old_subport_bit_disposition.push(BitDisposition::NewSubportBit(new_subport_bit));
600                    new_subport_bit += 1;
601                }
602            }
603            assert!(new_subport_bit <= new_wide_log2);
604            for (addr_bit_index, addr_bit) in (orig_wide_log2..).zip(addr) {
605                if ((swizzle.soft_addr_bits_mask >> addr_bit_index) & 1) != 0 {
606                    mux_addr_bit_disposition.push(BitDisposition::ChunkIndexBit(new_chunk_bit));
607                    new_chunk_bit += 1;
608                    mux_addr.push(addr_bit);
609                    num_chunks_port *= 2;
610                } else if new_subport_bit != new_wide_log2 {
611                    mux_addr_bit_disposition.push(BitDisposition::NewSubportBit(new_subport_bit));
612                    new_subport_bit += 1;
613                    mux_addr.push(addr_bit);
614                    num_chunks_port *= 2;
615                } else if hw_addr.len() != swizzle.hard_addr_bits - new_wide_log2 {
616                    hw_addr.push(addr_bit);
617                } else {
618                    mux_addr_bit_disposition.push(BitDisposition::ChunkIndexBit(new_chunk_bit));
619                    new_chunk_bit += 1;
620                    mux_addr.push(addr_bit);
621                }
622            }
623            assert!(hw_addr.len() <= swizzle.hard_addr_bits - new_wide_log2);
624            while hw_addr.len() < swizzle.hard_addr_bits - new_wide_log2 {
625                hw_addr.push(Net::ZERO);
626            }
627            let useful_mux_bits = if num_chunks_port < 2 { 0 } else { (num_chunks_port - 1).ilog2() as usize + 1 };
628            while mux_addr.len() < useful_mux_bits {
629                mux_addr_bit_disposition.push(BitDisposition::ChunkIndexBit(new_chunk_bit));
630                new_chunk_bit += 1;
631                mux_addr.push(Net::ZERO);
632            }
633
634            let mut data_swizzle = Vec::from_iter(std::iter::repeat_n(vec![], num_chunks_port));
635            for port_chunk in 0..num_chunks_port {
636                for orig_subport in 0..(1 << orig_wide_log2) {
637                    let mut new_subport = 0;
638                    let mut new_chunk = 0;
639                    for (bit, disposition) in old_subport_bit_disposition.iter().copied().enumerate() {
640                        let bit_val = (orig_subport >> bit) & 1;
641                        match disposition {
642                            BitDisposition::NewSubportBit(new_bit) => new_subport |= bit_val << new_bit,
643                            BitDisposition::ChunkIndexBit(new_bit) => new_chunk |= bit_val << new_bit,
644                        }
645                    }
646                    for (bit, disposition) in mux_addr_bit_disposition.iter().copied().enumerate() {
647                        if bit >= useful_mux_bits {
648                            continue;
649                        }
650                        let bit_val = (port_chunk >> bit) & 1;
651                        match disposition {
652                            BitDisposition::NewSubportBit(new_bit) => new_subport |= bit_val << new_bit,
653                            BitDisposition::ChunkIndexBit(new_bit) => new_chunk |= bit_val << new_bit,
654                        }
655                    }
656
657                    data_swizzle[port_chunk].push((new_subport, new_chunk));
658                }
659            }
660
661            (hw_addr, mux_addr, data_swizzle)
662        }
663
664        // perform steps 1-5 of the transformation on the write ports; this is done in a somewhat weird way:
665        // each write port is split into `1 << wide_log2` chunks, which are later reassembled
666        // when emitting the final memories.  this is done to make the slicing easier.
667        let mut write_ports = vec![];
668        for (port_index, orig_port) in self.write_ports.iter().enumerate() {
669            let orig_wide_log2 = orig_port.wide_log2(&self);
670            let new_wide_log2 = swizzle.write_wide_log2[port_index];
671            let (new_addr, mux_addr, data_swizzle) =
672                swizzle_addr(&orig_port.addr, swizzle, num_chunks_total, orig_wide_log2, new_wide_log2);
673            let mut subports = vec![];
674            for _ in 0..(1 << new_wide_log2) {
675                subports.push(MemoryWritePort {
676                    addr: new_addr.clone(),
677                    data: Value::undef(width_total_aligned),
678                    mask: Value::zero(width_total_aligned),
679                    clock: orig_port.clock,
680                });
681            }
682            for (mux_index, mux_index_data_swizzle) in data_swizzle.into_iter().enumerate() {
683                let mux_match = design.add_eq(&mux_addr, Const::from_uint(mux_index as u128, mux_addr.len()));
684                let mut new_data = Value::new();
685                let mut new_mask = Value::new();
686                // when creating the write mask, whenever a dummy bit needs to be inserted, make it a duplicate
687                // of the previous bit instead of an X.  this ensures that any mask granularity constraints
688                // are met (such constraints are generally the reason why dummy bits are inserted into the swizzle
689                // anyway).
690                for orig_subport_index in 0..(1 << orig_wide_log2) {
691                    for &source in &swizzle.data_swizzle {
692                        match source {
693                            Some(index) => {
694                                new_data.push(orig_port.data[orig_subport_index * self.width + index]);
695                                new_mask.push(orig_port.mask[orig_subport_index * self.width + index]);
696                            }
697                            None => {
698                                new_data.push(Net::UNDEF);
699                                if new_mask.is_empty() {
700                                    new_mask.push(Net::UNDEF);
701                                } else {
702                                    new_mask.push(new_mask.msb());
703                                }
704                            }
705                        }
706                    }
707                }
708                if !mux_addr.is_empty() {
709                    new_mask = design.add_dedup_mux(
710                        mux_match,
711                        new_mask,
712                        Value::zero(swizzle.data_swizzle.len() << orig_wide_log2),
713                    );
714                }
715                for (orig_subport_index, (new_subport_index, new_chunk_index)) in
716                    mux_index_data_swizzle.into_iter().enumerate()
717                {
718                    let new_chunk_slice = new_chunk_index * swizzle.data_swizzle.len()
719                        ..(new_chunk_index + 1) * swizzle.data_swizzle.len();
720                    let orig_subport_slice = orig_subport_index * swizzle.data_swizzle.len()
721                        ..(orig_subport_index + 1) * swizzle.data_swizzle.len();
722                    subports[new_subport_index].data[new_chunk_slice.clone()]
723                        .copy_from_slice(&new_data[orig_subport_slice.clone()]);
724                    subports[new_subport_index].mask[new_chunk_slice].copy_from_slice(&new_mask[orig_subport_slice]);
725                }
726            }
727            write_ports.push(subports);
728        }
729
730        // likewise, perform steps 1-5 of the transformation on the read ports, also in a chunky manner.
731        let mut read_ports = vec![];
732        for (port_index, orig_port) in self.read_ports.iter().enumerate() {
733            let orig_wide_log2 = orig_port.wide_log2(&self);
734            let new_wide_log2 = swizzle.read_wide_log2[port_index];
735            let (new_addr, mut mux_addr, data_swizzle) =
736                swizzle_addr(&orig_port.addr, swizzle, num_chunks_total, orig_wide_log2, new_wide_log2);
737            let mut subports = vec![];
738            for _ in 0..(1 << new_wide_log2) {
739                subports.push((
740                    MemoryReadPort {
741                        addr: new_addr.clone(),
742                        data_len: width_total_aligned,
743                        flip_flop: orig_port.flip_flop.as_ref().map(|flip_flop| MemoryReadFlipFlop {
744                            clock: flip_flop.clock,
745                            clear: flip_flop.clear,
746                            reset: flip_flop.reset,
747                            enable: flip_flop.enable,
748                            reset_over_enable: flip_flop.reset_over_enable,
749                            clear_value: Const::undef(width_total_aligned),
750                            reset_value: Const::undef(width_total_aligned),
751                            init_value: Const::undef(width_total_aligned),
752                            relations: flip_flop.relations.clone(),
753                        }),
754                    },
755                    design.add_void(width_total_aligned),
756                ));
757            }
758            if let Some(ref flip_flop) = orig_port.flip_flop {
759                mux_addr = design.add_dff(FlipFlop::new(mux_addr, flip_flop.clock).with_enable(flip_flop.enable));
760            }
761            let mut output_chunks = vec![];
762            for mux_index_data_swizzle in data_swizzle {
763                let mut output_chunk = Value::undef(orig_port.data_len);
764                for (orig_subport_index, (new_subport_index, new_chunk_index)) in
765                    mux_index_data_swizzle.into_iter().enumerate()
766                {
767                    let new_chunk_slice = new_chunk_index * swizzle.data_swizzle.len()
768                        ..(new_chunk_index + 1) * swizzle.data_swizzle.len();
769                    if let Some(ref flip_flop) = orig_port.flip_flop {
770                        let swizzle_const = |value: &Const| {
771                            Const::from_iter(swizzle.data_swizzle.iter().map(|&source| match source {
772                                Some(index) => value[orig_subport_index * self.width + index],
773                                None => Trit::Undef,
774                            }))
775                        };
776                        let new_flip_flop = subports[new_subport_index].0.flip_flop.as_mut().unwrap();
777                        new_flip_flop.clear_value[new_chunk_slice.clone()]
778                            .copy_from_slice(&swizzle_const(&flip_flop.clear_value)[..]);
779                        new_flip_flop.reset_value[new_chunk_slice.clone()]
780                            .copy_from_slice(&swizzle_const(&flip_flop.reset_value)[..]);
781                        new_flip_flop.init_value[new_chunk_slice.clone()]
782                            .copy_from_slice(&swizzle_const(&flip_flop.init_value)[..]);
783                    }
784                    let swizzled_output_chunk = subports[new_subport_index].1.slice(new_chunk_slice);
785                    for (&index, swizzled_output_bit) in swizzle.data_swizzle.iter().zip(swizzled_output_chunk) {
786                        if let Some(index) = index {
787                            output_chunk[orig_subport_index * self.width + index] = swizzled_output_bit;
788                        }
789                    }
790                }
791                output_chunks.push(output_chunk);
792            }
793            // create the mux tree bottom-up, halving the size of the output_chunks array each time
794            for addr_bit in &mux_addr {
795                output_chunks = Vec::from_iter(output_chunks.chunks(2).map(|pair| {
796                    if pair.len() == 2 {
797                        design.add_mux(addr_bit, &pair[1], &pair[0])
798                    } else {
799                        // we may be reading out-of-bounds. could also use a mux-with-undef here, but there's little point.
800                        pair[0].clone()
801                    }
802                }))
803            }
804            let [port_output] = output_chunks.try_into().unwrap();
805            design.replace_value(output.slice(self.read_port_output_slice(port_index)), port_output);
806            read_ports.push(subports);
807        }
808
809        // prepare the memories, applying step 6 (not including initial values)
810        let mut result = vec![];
811        for index in 0..num_mems_total {
812            let data_slice = index * swizzle.data_width_unit..(index + 1) * swizzle.data_width_unit;
813            let depth = if index < num_mems_full { depth_full } else { depth_partial };
814            let mut memory = Memory {
815                depth,
816                width: swizzle.data_width_unit,
817                init_value: Const::undef(depth * swizzle.data_width_unit),
818                write_ports: vec![],
819                read_ports: vec![],
820            };
821            for write_subports in &write_ports {
822                let mut port = MemoryWritePort {
823                    addr: write_subports[0].addr.clone(),
824                    data: Value::new(),
825                    mask: Value::new(),
826                    clock: write_subports[0].clock,
827                };
828                for subport in write_subports {
829                    port.data.extend(subport.data.slice(data_slice.clone()));
830                    port.mask.extend(subport.mask.slice(data_slice.clone()));
831                }
832                memory.write_ports.push(port);
833            }
834            let mut new_output = Value::new();
835            for read_subports in &read_ports {
836                let mut port = MemoryReadPort {
837                    addr: read_subports[0].0.addr.clone(),
838                    data_len: swizzle.data_width_unit * read_subports.len(),
839                    flip_flop: read_subports[0].0.flip_flop.as_ref().map(|flip_flop| MemoryReadFlipFlop {
840                        clock: flip_flop.clock,
841                        clear: flip_flop.clear,
842                        reset: flip_flop.reset,
843                        enable: flip_flop.enable,
844                        reset_over_enable: flip_flop.reset_over_enable,
845                        clear_value: Const::new(),
846                        reset_value: Const::new(),
847                        init_value: Const::new(),
848                        relations: flip_flop.relations.clone(),
849                    }),
850                };
851                for (subport, subport_output) in read_subports {
852                    if let Some(ref subport_flip_flop) = subport.flip_flop {
853                        let port_flip_flop = port.flip_flop.as_mut().unwrap();
854                        port_flip_flop.clear_value.extend(subport_flip_flop.clear_value.slice(data_slice.clone()));
855                        port_flip_flop.reset_value.extend(subport_flip_flop.reset_value.slice(data_slice.clone()));
856                        port_flip_flop.init_value.extend(subport_flip_flop.init_value.slice(data_slice.clone()));
857                    }
858                    new_output.extend(subport_output.slice(data_slice.clone()));
859                }
860                memory.read_ports.push(port);
861            }
862            result.push((memory, new_output));
863        }
864
865        // fill initial values.
866        for row in 0..self.depth {
867            let orig_value = self.init_value.slice(row * self.width..(row + 1) * self.width);
868            // apply step 1
869            let swizzled_value = Const::from_iter(swizzle.data_swizzle.iter().map(|&index| match index {
870                None => Trit::Undef,
871                Some(index) => orig_value[index],
872            }));
873            // apply steps 2 and 5 to the row index
874            let mut new_row = 0;
875            let mut new_row_bit = 0;
876            let mut chunk_index = 0;
877            let mut chunk_index_bit = 0;
878            assert!(row < (1 << 32));
879            for bit in 0..32 {
880                let bit_value = (row >> bit) & 1;
881                if ((swizzle.soft_addr_bits_mask >> bit) & 1) != 0 || new_row_bit == swizzle.hard_addr_bits {
882                    chunk_index |= bit_value << chunk_index_bit;
883                    chunk_index_bit += 1;
884                } else {
885                    new_row |= bit_value << new_row_bit;
886                    new_row_bit += 1;
887                }
888            }
889            assert_eq!(new_row_bit, swizzle.hard_addr_bits);
890            // apply step 6
891            for (index, bit) in swizzled_value.iter().enumerate() {
892                let full_index = chunk_index * swizzle.data_swizzle.len() + index;
893                let mem_index = full_index / swizzle.data_width_unit;
894                let mem_bit_index = full_index % swizzle.data_width_unit;
895                result[mem_index].0.init_value[new_row * swizzle.data_width_unit + mem_bit_index] = bit;
896            }
897        }
898
899        result
900    }
901
902    fn can_lower_fallback(&self) -> bool {
903        for port in &self.write_ports {
904            if port.clock != self.write_ports[0].clock {
905                return false;
906            }
907        }
908        true
909    }
910
911    fn lower_fallback(mut self, design: &Design, output: &Value) {
912        let mut write_clock = None;
913        for write_port in &self.write_ports {
914            if write_clock.is_none() {
915                write_clock = Some(write_port.clock);
916            } else if write_clock != Some(write_port.clock) {
917                panic!("trying to lower memory with multiple write clocks");
918            }
919        }
920        let mut output = output.clone();
921        // convert all read ports to async first.
922        for port_index in 0..self.read_ports.len() {
923            self.unmap_read_dff(design, port_index, &mut output);
924        }
925        let max_wide_log2 = self
926            .write_ports
927            .iter()
928            .map(|port| port.wide_log2(&self))
929            .max()
930            .unwrap_or(0)
931            .max(self.read_ports.iter().map(|port| port.wide_log2(&self)).max().unwrap_or(0));
932        // create a swizzle structure.  this will effectively split the memory into one sub-memory per row.
933        let swizzle = MemorySwizzle {
934            data_swizzle: Vec::from_iter((0..self.width).map(Some)),
935            soft_addr_bits_mask: (1 << max_wide_log2) - 1,
936            write_wide_log2: vec![0; self.write_ports.len()],
937            read_wide_log2: vec![0; self.read_ports.len()],
938            hard_addr_bits: 0,
939            data_width_unit: self.width,
940        };
941        for (memory, mem_output) in self.swizzle(design, &output, &swizzle) {
942            assert_eq!(memory.depth, 1);
943            let data = if memory.write_ports.is_empty() {
944                Value::from(memory.init_value)
945            } else {
946                let q = design.add_void(memory.width);
947                let mut data = q.clone();
948                for port in memory.write_ports {
949                    assert_eq!(port.data.len(), data.len());
950                    // multiplex our data onto the chain
951                    data = design.add_bitwise_mux(port.mask, port.data, data);
952                }
953                design.replace_value(
954                    &q,
955                    design.add_dff(FlipFlop::new(data, write_clock.unwrap()).with_init(memory.init_value)),
956                );
957                q
958            };
959            // all read ports are asynchronous, and there is only a single row in the memory.
960            // just wire it straight to all read data outputs.
961            for (port_index, port) in memory.read_ports.into_iter().enumerate() {
962                design
963                    .replace_value(mem_output.slice(port_index * memory.width..(port_index + 1) * memory.width), &data);
964                assert_eq!(port.data_len, data.len());
965            }
966        }
967    }
968}
969
970trait DesignExt {
971    fn add_dedup_dff(&self, flip_flop: FlipFlop) -> Value;
972    fn add_dedup_mux(&self, arg1: impl Into<ControlNet>, arg2: impl Into<Value>, arg3: impl Into<Value>) -> Value;
973    fn add_bitwise_mux(&self, arg1: impl Into<Value>, arg2: impl Into<Value>, arg3: impl Into<Value>) -> Value;
974}
975
976impl DesignExt for Design {
977    fn add_dedup_dff(&self, mut flip_flop: FlipFlop) -> Value {
978        let mut cache = BTreeMap::new();
979        let mut result_swizzle = vec![];
980        let mut dedup_data = Value::new();
981        let mut dedup_reset_value = Const::new();
982        let mut dedup_clear_value = Const::new();
983        let mut dedup_init_value = Const::new();
984
985        for (((data_bit, reset_bit), clear_bit), init_bit) in
986            flip_flop.data.iter().zip(flip_flop.reset_value).zip(flip_flop.clear_value).zip(flip_flop.init_value)
987        {
988            match cache.entry((data_bit, reset_bit, clear_bit, init_bit)) {
989                btree_map::Entry::Vacant(entry) => {
990                    let index = dedup_data.len();
991                    dedup_data.push(data_bit);
992                    dedup_reset_value.push(reset_bit);
993                    dedup_clear_value.push(clear_bit);
994                    dedup_init_value.push(init_bit);
995                    result_swizzle.push(index);
996                    entry.insert(index);
997                }
998                btree_map::Entry::Occupied(entry) => {
999                    result_swizzle.push(*entry.get());
1000                }
1001            }
1002        }
1003        flip_flop.data = dedup_data;
1004        flip_flop.reset_value = dedup_reset_value;
1005        flip_flop.clear_value = dedup_clear_value;
1006        flip_flop.init_value = dedup_init_value;
1007        let dedup_result = self.add_dff(flip_flop);
1008        Value::from_iter(result_swizzle.iter().map(|&index| dedup_result[index]))
1009    }
1010
1011    fn add_dedup_mux(&self, arg1: impl Into<ControlNet>, arg2: impl Into<Value>, arg3: impl Into<Value>) -> Value {
1012        let arg1 = arg1.into();
1013        let arg2 = arg2.into();
1014        let arg3 = arg3.into();
1015        let mut cache = BTreeMap::new();
1016        let mut result_swizzle = vec![];
1017        let mut dedup_arg2 = Value::new();
1018        let mut dedup_arg3 = Value::new();
1019        for (arg2_bit, arg3_bit) in arg2.iter().zip(arg3) {
1020            match cache.entry((arg2_bit, arg3_bit)) {
1021                btree_map::Entry::Vacant(entry) => {
1022                    let index = dedup_arg2.len();
1023                    dedup_arg2.push(arg2_bit);
1024                    dedup_arg3.push(arg3_bit);
1025                    result_swizzle.push(index);
1026                    entry.insert(index);
1027                }
1028                btree_map::Entry::Occupied(entry) => {
1029                    result_swizzle.push(*entry.get());
1030                }
1031            }
1032        }
1033        let dedup_result = self.add_mux(arg1, dedup_arg2, dedup_arg3);
1034        Value::from_iter(result_swizzle.iter().map(|&index| dedup_result[index]))
1035    }
1036
1037    fn add_bitwise_mux(&self, arg1: impl Into<Value>, arg2: impl Into<Value>, arg3: impl Into<Value>) -> Value {
1038        let arg1 = arg1.into();
1039        let arg2 = arg2.into();
1040        let arg3 = arg3.into();
1041        let mut result = Value::new();
1042        let mut index = 0;
1043        assert_eq!(arg1.len(), arg2.len());
1044        assert_eq!(arg1.len(), arg3.len());
1045        while index < arg1.len() {
1046            let mut end_index = index;
1047            while end_index < arg1.len() && arg1[end_index] == arg1[index] {
1048                end_index += 1;
1049            }
1050            let arg2_chunk = arg2.slice(index..end_index);
1051            let arg3_chunk = arg3.slice(index..end_index);
1052            result.extend(self.add_dedup_mux(arg1[index], arg2_chunk, arg3_chunk));
1053            index = end_index;
1054        }
1055        result
1056    }
1057}