1use std::collections::{btree_map, BTreeMap};
48
49use prjunnamed_netlist::{
50 Const, ControlNet, Design, FlipFlop, Memory, MemoryPortRelation, MemoryReadFlipFlop, MemoryReadPort,
51 MemoryWritePort, Net, Trit, Value,
52};
53
54#[derive(Debug, Clone, PartialEq, Eq)]
100pub struct MemorySwizzle {
101 pub data_swizzle: Vec<Option<usize>>,
106 pub soft_addr_bits_mask: u32,
107 pub write_wide_log2: Vec<usize>,
108 pub read_wide_log2: Vec<usize>,
109 pub hard_addr_bits: usize,
110 pub data_width_unit: usize,
111}
112
113pub trait MemoryExt {
115 fn extract_read_ports(&self, ports: &[usize], output: &Value) -> (Memory, Value);
121
122 fn unmap_read_dff(&mut self, design: &Design, port_index: usize, output: &mut Value);
132
133 fn unmap_read_init_reset_transparency(
148 &mut self,
149 design: &Design,
150 port_index: usize,
151 include_transparency: bool,
152 output: &mut Value,
153 );
154
155 fn emulate_read_before_write(&mut self, design: &Design);
170
171 fn make_data_swizzle(&self, write_port_granularity: &[usize]) -> Vec<Option<usize>>;
180
181 fn swizzle_depths(&self, swizzle: &MemorySwizzle) -> Vec<usize>;
190
191 fn swizzle_mux_bits(&self, read_ports: &[usize], swizzle: &MemorySwizzle) -> (Vec<usize>, Vec<usize>);
199
200 fn swizzle(self, design: &Design, output: &Value, swizzle: &MemorySwizzle) -> Vec<(Memory, Value)>;
203
204 fn can_lower_fallback(&self) -> bool;
208
209 fn lower_fallback(self, design: &Design, output: &Value);
219}
220
221fn transparency_mux(
228 memory: &Memory,
229 design: &Design,
230 read_port_index: usize,
231 write_port_index: usize,
232 data: Value,
233 mask: Option<Value>,
234) -> (Value, Option<Value>) {
235 let read_port = &memory.read_ports[read_port_index];
236 let write_port = &memory.write_ports[write_port_index];
237
238 let write_wide_log2 = write_port.wide_log2(memory);
240 let read_wide_log2 = read_port.wide_log2(memory);
241 let (write_addr, read_addr, write_data, write_mask) = match write_wide_log2.cmp(&read_wide_log2) {
242 std::cmp::Ordering::Less => {
243 let wide_log2_diff = read_wide_log2 - write_wide_log2;
245 let select = write_port.addr.slice(..wide_log2_diff);
246 let write_addr = write_port.addr.slice(wide_log2_diff..);
247 let write_data =
248 design.add_shl(write_port.data.zext(read_port.data_len), &select, write_port.data.len() as u32);
249 let write_mask =
250 design.add_shl(write_port.mask.zext(read_port.data_len), &select, write_port.mask.len() as u32);
251 (write_addr, read_port.addr.clone(), write_data, write_mask)
252 }
253 std::cmp::Ordering::Equal => {
254 (write_port.addr.clone(), read_port.addr.clone(), write_port.data.clone(), write_port.mask.clone())
255 }
256 std::cmp::Ordering::Greater => {
257 let wide_log2_diff = write_wide_log2 - read_wide_log2;
259 let select = read_port.addr.slice(..wide_log2_diff);
260 let read_addr = read_port.addr.slice(wide_log2_diff..);
261 let write_data =
262 design.add_ushr(&write_port.data, &select, read_port.data_len as u32).slice(..read_port.data_len);
263 let write_mask =
264 design.add_ushr(&write_port.mask, &select, read_port.data_len as u32).slice(..read_port.data_len);
265 (write_port.addr.clone(), read_addr, write_data, write_mask)
266 }
267 };
268
269 let max_addr_width = std::cmp::max(read_addr.len(), write_addr.len());
271 let addr_eq = design.add_eq(read_addr.zext(max_addr_width), write_addr.zext(max_addr_width));
272
273 let sel_write = design.add_dedup_mux(addr_eq, &write_mask, Value::zero(write_mask.len()));
275 let new_data = design.add_bitwise_mux(sel_write, write_data, data);
276 let new_mask = mask.map(|mask| design.add_dedup_mux(addr_eq, design.add_or(&mask, write_mask), mask));
277
278 (new_data, new_mask)
279}
280
281impl MemoryExt for Memory {
282 fn extract_read_ports(&self, ports: &[usize], output: &Value) -> (Memory, Value) {
283 let mut new_memory = Memory {
284 depth: self.depth,
285 width: self.width,
286 init_value: self.init_value.clone(),
287 write_ports: self.write_ports.clone(),
288 read_ports: vec![],
289 };
290 let mut new_output = Value::new();
291 for &port_index in ports {
292 new_memory.read_ports.push(self.read_ports[port_index].clone());
293 let output_slice = self.read_port_output_slice(port_index);
294 new_output.extend(output.slice(output_slice));
295 }
296 (new_memory, new_output)
297 }
298
299 fn unmap_read_dff(&mut self, design: &Design, port_index: usize, output: &mut Value) {
300 let read_port = &mut self.read_ports[port_index];
301 let Some(port_flip_flop) = read_port.flip_flop.take() else {
302 return;
303 };
304 let new_port_output = design.add_void(read_port.data_len);
305 let mut data = new_port_output.clone();
306 for (write_port_index, relation) in port_flip_flop.relations.into_iter().enumerate() {
307 if relation == MemoryPortRelation::Transparent {
308 (data, _) = transparency_mux(self, design, port_index, write_port_index, data, None);
309 }
310 }
311 let q = design.add_dff(FlipFlop {
312 data,
313 clock: port_flip_flop.clock,
314 clear: port_flip_flop.clear,
315 load: ControlNet::Pos(Net::ZERO),
316 load_data: Value::undef(new_port_output.len()),
317 reset: port_flip_flop.reset,
318 enable: port_flip_flop.enable,
319 reset_over_enable: port_flip_flop.reset_over_enable,
320 clear_value: port_flip_flop.clear_value,
321 reset_value: port_flip_flop.reset_value,
322 init_value: port_flip_flop.init_value,
323 });
324 let output_slice = self.read_port_output_slice(port_index);
325 design.replace_value(output.slice(output_slice.clone()), q);
326 output[output_slice.clone()].copy_from_slice(&new_port_output[..]);
327 }
328
329 fn unmap_read_init_reset_transparency(
330 &mut self,
331 design: &Design,
332 port_index: usize,
333 include_transparency: bool,
334 output: &mut Value,
335 ) {
336 let read_port = &self.read_ports[port_index];
337 let port_flip_flop = read_port.flip_flop.as_ref().unwrap();
338
339 let has_init = port_flip_flop.has_init_value();
340 let has_reset = port_flip_flop.has_reset();
341 let has_clear = port_flip_flop.has_clear();
342
343 let mut data = Value::undef(read_port.data_len);
347 let mut mask = Some(Value::zero(read_port.data_len));
348 let mut has_transparency = false;
349 if include_transparency {
350 for (write_port_index, &relation) in port_flip_flop.relations.iter().enumerate() {
351 if relation == MemoryPortRelation::Transparent {
352 (data, mask) = transparency_mux(self, design, port_index, write_port_index, data, mask);
353 has_transparency = true;
354 }
355 }
356 }
357 let mask = mask.unwrap();
358
359 if !has_init && !has_reset && !has_clear && !has_transparency {
360 return;
361 }
362
363 let read_port = &mut self.read_ports[port_index];
365 let port_flip_flop = read_port.flip_flop.as_mut().unwrap();
366 let data = design.add_dff(FlipFlop {
367 data,
368 clock: port_flip_flop.clock,
369 clear: port_flip_flop.clear,
370 load: ControlNet::Pos(Net::ZERO),
371 load_data: Value::undef(read_port.data_len),
372 reset: port_flip_flop.reset,
373 enable: port_flip_flop.enable,
374 reset_over_enable: port_flip_flop.reset_over_enable,
375 clear_value: std::mem::replace(&mut port_flip_flop.clear_value, Const::undef(read_port.data_len)),
376 reset_value: std::mem::replace(&mut port_flip_flop.reset_value, Const::undef(read_port.data_len)),
377 init_value: std::mem::replace(&mut port_flip_flop.init_value, Const::undef(read_port.data_len)),
378 });
379 let mask = design.add_dedup_dff(FlipFlop {
380 data: mask,
381 clock: port_flip_flop.clock,
382 clear: port_flip_flop.clear,
383 load: ControlNet::Pos(Net::ZERO),
384 load_data: Value::undef(read_port.data_len),
385 reset: port_flip_flop.reset,
386 enable: port_flip_flop.enable,
387 reset_over_enable: port_flip_flop.reset_over_enable,
388 clear_value: Const::ones(read_port.data_len),
389 reset_value: Const::ones(read_port.data_len),
390 init_value: if has_init { Const::ones(read_port.data_len) } else { Const::undef(read_port.data_len) },
391 });
392 if include_transparency {
393 for relation in &mut port_flip_flop.relations {
394 if *relation == MemoryPortRelation::Transparent {
395 *relation = MemoryPortRelation::Undefined;
396 }
397 }
398 }
399 port_flip_flop.clear = ControlNet::ZERO;
400 port_flip_flop.reset = ControlNet::ZERO;
401
402 let new_port_output = design.add_void(read_port.data_len);
404 let mux = design.add_bitwise_mux(mask, data, &new_port_output);
405
406 let output_slice = self.read_port_output_slice(port_index);
408 design.replace_value(output.slice(output_slice.clone()), mux);
409 output[output_slice.clone()].copy_from_slice(&new_port_output[..]);
410 }
411
412 fn emulate_read_before_write(&mut self, design: &Design) {
413 let mut rdfirst_write_ports = vec![false; self.write_ports.len()];
414 let mut transparent_write_ports = vec![false; self.write_ports.len()];
415
416 for port in &self.read_ports {
417 let port_flip_flop = port.flip_flop.as_ref().unwrap();
418 assert_eq!(
419 port_flip_flop.clock,
420 self.read_ports[0].flip_flop.as_ref().unwrap().clock,
421 "all read ports must have the same clock"
422 );
423 for (write_port_index, &relation) in port_flip_flop.relations.iter().enumerate() {
424 match relation {
425 MemoryPortRelation::Undefined => (),
426 MemoryPortRelation::ReadBeforeWrite => rdfirst_write_ports[write_port_index] = true,
427 MemoryPortRelation::Transparent => transparent_write_ports[write_port_index] = true,
428 }
429 }
430 }
431
432 let init_undef = self.init_value.is_undef();
433
434 for (port_index, port) in self.write_ports.iter_mut().enumerate() {
435 if !rdfirst_write_ports[port_index] {
436 continue;
437 }
438 assert!(
439 !transparent_write_ports[port_index],
440 "a single write port cannot have both a read-before-write and a transparent relation to two read ports"
441 );
442 port.addr = design.add_dff(FlipFlop::new(std::mem::take(&mut port.addr), port.clock));
443 port.data = design.add_dff(FlipFlop::new(std::mem::take(&mut port.data), port.clock));
444 port.mask = design.add_dedup_dff(
445 FlipFlop::new(std::mem::take(&mut port.mask), port.clock).with_init(if init_undef {
446 Const::undef(port.data.len())
447 } else {
448 Const::zero(port.data.len())
449 }),
450 );
451 port.clock = !port.clock;
452 for read_port in &mut self.read_ports {
453 let read_ff = read_port.flip_flop.as_mut().unwrap();
454 read_ff.relations[port_index] = MemoryPortRelation::Undefined;
455 }
456 }
457 }
458
459 fn make_data_swizzle(&self, write_port_granularity: &[usize]) -> Vec<Option<usize>> {
460 assert_eq!(self.write_ports.len(), write_port_granularity.len());
461 let mask_slices = Vec::from_iter((0..self.width).map(|bit_index| {
462 Vec::from_iter(self.write_ports.iter().map(|port| {
463 Value::from_iter(
464 (0..(port.data.len() / self.width))
465 .map(|subport_index| port.mask[subport_index * self.width + bit_index]),
466 )
467 }))
468 }));
469 let mut result = vec![];
470 for bit_index in 0..self.width {
471 'check_slices: loop {
472 for (port_index, &port_granularity) in write_port_granularity.iter().enumerate() {
473 if !result.len().is_multiple_of(port_granularity)
474 && mask_slices[bit_index][port_index] != mask_slices[bit_index - 1][port_index]
475 {
476 result.push(None);
477 continue 'check_slices;
478 }
479 }
480 break;
481 }
482 result.push(Some(bit_index));
483 }
484 'align: loop {
485 for &port_granularity in write_port_granularity {
486 if !result.len().is_multiple_of(port_granularity) {
487 result.push(None);
488 continue 'align;
489 }
490 }
491 break;
492 }
493 result
494 }
495
496 fn swizzle_depths(&self, swizzle: &MemorySwizzle) -> Vec<usize> {
497 let soft_addr_bits = swizzle.soft_addr_bits_mask.count_ones() as usize;
499 let base_width = swizzle.data_swizzle.len() << soft_addr_bits;
501 let base_depth = self.depth >> soft_addr_bits;
502 let num_chunks_full = base_depth >> swizzle.hard_addr_bits;
504 let depth_full = 1 << swizzle.hard_addr_bits;
505 let depth_partial = base_depth & ((1 << swizzle.hard_addr_bits) - 1);
507 let width_full = base_width * num_chunks_full;
509 let width_total = if depth_partial != 0 { width_full + base_width } else { width_full };
511 let num_mems_full = width_full.div_ceil(swizzle.data_width_unit);
513 let num_mems_total = width_total.div_ceil(swizzle.data_width_unit);
514 let mut result = vec![depth_full; num_mems_full];
516 while result.len() < num_mems_total {
517 result.push(depth_partial);
518 }
519 result
520 }
521
522 fn swizzle_mux_bits(&self, read_ports: &[usize], swizzle: &MemorySwizzle) -> (Vec<usize>, Vec<usize>) {
523 let soft_addr_bits = swizzle.soft_addr_bits_mask.count_ones() as usize;
524 let chunks = self.depth.div_ceil(1 << (soft_addr_bits + swizzle.hard_addr_bits));
525 let chunks_mux_bits = if chunks <= 1 { 0 } else { (chunks - 1).ilog2() as usize + 1 };
526 let write_mux_bits = Vec::from_iter(self.write_ports.iter().zip(swizzle.write_wide_log2.iter()).map(
527 |(port, &new_wide_log2)| {
528 let old_wide_log2 = port.wide_log2(self);
529 chunks_mux_bits + soft_addr_bits + new_wide_log2 - old_wide_log2
530 },
531 ));
532 let read_mux_bits = Vec::from_iter(read_ports.iter().zip(swizzle.read_wide_log2.iter()).map(
533 |(&port_index, &new_wide_log2)| {
534 let port = &self.read_ports[port_index];
535 let old_wide_log2 = port.wide_log2(self);
536 chunks_mux_bits + soft_addr_bits + new_wide_log2 - old_wide_log2
537 },
538 ));
539 (write_mux_bits, read_mux_bits)
540 }
541
542 fn swizzle(mut self, design: &Design, output: &Value, swizzle: &MemorySwizzle) -> Vec<(Memory, Value)> {
543 if self.width == 0 {
545 return vec![];
546 }
547 if self.depth == 0 {
549 let mut output = output.clone();
552 for port_index in 0..self.read_ports.len() {
553 self.unmap_read_dff(design, port_index, &mut output);
554 }
555 design.replace_value(&output, Value::undef(output.len()));
557 return vec![];
558 }
559
560 let soft_addr_bits = swizzle.soft_addr_bits_mask.count_ones() as usize;
562 let base_width = swizzle.data_swizzle.len() << soft_addr_bits;
563 let base_depth = self.depth >> soft_addr_bits;
564 let depth_full = 1 << swizzle.hard_addr_bits;
565 let depth_partial = base_depth & ((1 << swizzle.hard_addr_bits) - 1);
566 let num_chunks_full = base_depth >> swizzle.hard_addr_bits;
567 let num_chunks_total = if depth_partial == 0 { num_chunks_full } else { num_chunks_full + 1 };
568 let width_full = base_width * num_chunks_full;
569 let width_total = base_width * num_chunks_total;
570 let num_mems_full = width_full.div_ceil(swizzle.data_width_unit);
571 let num_mems_total = width_total.div_ceil(swizzle.data_width_unit);
572 let width_total_aligned = num_mems_total * swizzle.data_width_unit;
573
574 fn swizzle_addr(
581 addr: &Value,
582 swizzle: &MemorySwizzle,
583 num_chunks_total: usize,
584 orig_wide_log2: usize,
585 new_wide_log2: usize,
586 ) -> (Value, Value, Vec<Vec<(usize, usize)>>) {
587 #[derive(Clone, Copy)]
588 enum BitDisposition {
589 NewSubportBit(usize),
590 ChunkIndexBit(usize),
591 }
592
593 let mut num_chunks_port = num_chunks_total;
594 let mut hw_addr = Value::new();
595 let mut mux_addr = Value::new();
596 let mut old_subport_bit_disposition = vec![];
597 let mut mux_addr_bit_disposition = vec![];
598 let mut new_subport_bit = 0;
599 let mut new_chunk_bit = 0;
600 for addr_bit_index in 0..orig_wide_log2 {
601 if ((swizzle.soft_addr_bits_mask >> addr_bit_index) & 1) != 0 {
602 old_subport_bit_disposition.push(BitDisposition::ChunkIndexBit(new_chunk_bit));
603 new_chunk_bit += 1;
604 } else {
605 old_subport_bit_disposition.push(BitDisposition::NewSubportBit(new_subport_bit));
606 new_subport_bit += 1;
607 }
608 }
609 assert!(new_subport_bit <= new_wide_log2);
610 for (addr_bit_index, addr_bit) in (orig_wide_log2..).zip(addr) {
611 if ((swizzle.soft_addr_bits_mask >> addr_bit_index) & 1) != 0 {
612 mux_addr_bit_disposition.push(BitDisposition::ChunkIndexBit(new_chunk_bit));
613 new_chunk_bit += 1;
614 mux_addr.push(addr_bit);
615 num_chunks_port *= 2;
616 } else if new_subport_bit != new_wide_log2 {
617 mux_addr_bit_disposition.push(BitDisposition::NewSubportBit(new_subport_bit));
618 new_subport_bit += 1;
619 mux_addr.push(addr_bit);
620 num_chunks_port *= 2;
621 } else if hw_addr.len() != swizzle.hard_addr_bits - new_wide_log2 {
622 hw_addr.push(addr_bit);
623 } else {
624 mux_addr_bit_disposition.push(BitDisposition::ChunkIndexBit(new_chunk_bit));
625 new_chunk_bit += 1;
626 mux_addr.push(addr_bit);
627 }
628 }
629 assert!(hw_addr.len() <= swizzle.hard_addr_bits - new_wide_log2);
630 while hw_addr.len() < swizzle.hard_addr_bits - new_wide_log2 {
631 hw_addr.push(Net::ZERO);
632 }
633 let useful_mux_bits = if num_chunks_port < 2 { 0 } else { (num_chunks_port - 1).ilog2() as usize + 1 };
634 while mux_addr.len() < useful_mux_bits {
635 mux_addr_bit_disposition.push(BitDisposition::ChunkIndexBit(new_chunk_bit));
636 new_chunk_bit += 1;
637 mux_addr.push(Net::ZERO);
638 }
639
640 let mut data_swizzle = Vec::from_iter(std::iter::repeat_n(vec![], num_chunks_port));
641 for port_chunk in 0..num_chunks_port {
642 for orig_subport in 0..(1 << orig_wide_log2) {
643 let mut new_subport = 0;
644 let mut new_chunk = 0;
645 for (bit, disposition) in old_subport_bit_disposition.iter().copied().enumerate() {
646 let bit_val = (orig_subport >> bit) & 1;
647 match disposition {
648 BitDisposition::NewSubportBit(new_bit) => new_subport |= bit_val << new_bit,
649 BitDisposition::ChunkIndexBit(new_bit) => new_chunk |= bit_val << new_bit,
650 }
651 }
652 for (bit, disposition) in mux_addr_bit_disposition.iter().copied().enumerate() {
653 if bit >= useful_mux_bits {
654 continue;
655 }
656 let bit_val = (port_chunk >> bit) & 1;
657 match disposition {
658 BitDisposition::NewSubportBit(new_bit) => new_subport |= bit_val << new_bit,
659 BitDisposition::ChunkIndexBit(new_bit) => new_chunk |= bit_val << new_bit,
660 }
661 }
662
663 data_swizzle[port_chunk].push((new_subport, new_chunk));
664 }
665 }
666
667 (hw_addr, mux_addr, data_swizzle)
668 }
669
670 let mut write_ports = vec![];
674 for (port_index, orig_port) in self.write_ports.iter().enumerate() {
675 let orig_wide_log2 = orig_port.wide_log2(&self);
676 let new_wide_log2 = swizzle.write_wide_log2[port_index];
677 let (new_addr, mux_addr, data_swizzle) =
678 swizzle_addr(&orig_port.addr, swizzle, num_chunks_total, orig_wide_log2, new_wide_log2);
679 let mut subports = vec![];
680 for _ in 0..(1 << new_wide_log2) {
681 subports.push(MemoryWritePort {
682 addr: new_addr.clone(),
683 data: Value::undef(width_total_aligned),
684 mask: Value::zero(width_total_aligned),
685 clock: orig_port.clock,
686 });
687 }
688 for (mux_index, mux_index_data_swizzle) in data_swizzle.into_iter().enumerate() {
689 let mux_match = design.add_eq(&mux_addr, Const::from_uint(mux_index as u128, mux_addr.len()));
690 let mut new_data = Value::new();
691 let mut new_mask = Value::new();
692 for orig_subport_index in 0..(1 << orig_wide_log2) {
697 for &source in &swizzle.data_swizzle {
698 match source {
699 Some(index) => {
700 new_data.push(orig_port.data[orig_subport_index * self.width + index]);
701 new_mask.push(orig_port.mask[orig_subport_index * self.width + index]);
702 }
703 None => {
704 new_data.push(Net::UNDEF);
705 if new_mask.is_empty() {
706 new_mask.push(Net::UNDEF);
707 } else {
708 new_mask.push(new_mask.msb());
709 }
710 }
711 }
712 }
713 }
714 if !mux_addr.is_empty() {
715 new_mask = design.add_dedup_mux(
716 mux_match,
717 new_mask,
718 Value::zero(swizzle.data_swizzle.len() << orig_wide_log2),
719 );
720 }
721 for (orig_subport_index, (new_subport_index, new_chunk_index)) in
722 mux_index_data_swizzle.into_iter().enumerate()
723 {
724 let new_chunk_slice = new_chunk_index * swizzle.data_swizzle.len()
725 ..(new_chunk_index + 1) * swizzle.data_swizzle.len();
726 let orig_subport_slice = orig_subport_index * swizzle.data_swizzle.len()
727 ..(orig_subport_index + 1) * swizzle.data_swizzle.len();
728 subports[new_subport_index].data[new_chunk_slice.clone()]
729 .copy_from_slice(&new_data[orig_subport_slice.clone()]);
730 subports[new_subport_index].mask[new_chunk_slice].copy_from_slice(&new_mask[orig_subport_slice]);
731 }
732 }
733 write_ports.push(subports);
734 }
735
736 let mut read_ports = vec![];
738 for (port_index, orig_port) in self.read_ports.iter().enumerate() {
739 let orig_wide_log2 = orig_port.wide_log2(&self);
740 let new_wide_log2 = swizzle.read_wide_log2[port_index];
741 let (new_addr, mut mux_addr, data_swizzle) =
742 swizzle_addr(&orig_port.addr, swizzle, num_chunks_total, orig_wide_log2, new_wide_log2);
743 let mut subports = vec![];
744 for _ in 0..(1 << new_wide_log2) {
745 subports.push((
746 MemoryReadPort {
747 addr: new_addr.clone(),
748 data_len: width_total_aligned,
749 flip_flop: orig_port.flip_flop.as_ref().map(|flip_flop| MemoryReadFlipFlop {
750 clock: flip_flop.clock,
751 clear: flip_flop.clear,
752 reset: flip_flop.reset,
753 enable: flip_flop.enable,
754 reset_over_enable: flip_flop.reset_over_enable,
755 clear_value: Const::undef(width_total_aligned),
756 reset_value: Const::undef(width_total_aligned),
757 init_value: Const::undef(width_total_aligned),
758 relations: flip_flop.relations.clone(),
759 }),
760 },
761 design.add_void(width_total_aligned),
762 ));
763 }
764 if let Some(ref flip_flop) = orig_port.flip_flop {
765 mux_addr = design.add_dff(FlipFlop::new(mux_addr, flip_flop.clock).with_enable(flip_flop.enable));
766 }
767 let mut output_chunks = vec![];
768 for mux_index_data_swizzle in data_swizzle {
769 let mut output_chunk = Value::undef(orig_port.data_len);
770 for (orig_subport_index, (new_subport_index, new_chunk_index)) in
771 mux_index_data_swizzle.into_iter().enumerate()
772 {
773 let new_chunk_slice = new_chunk_index * swizzle.data_swizzle.len()
774 ..(new_chunk_index + 1) * swizzle.data_swizzle.len();
775 if let Some(ref flip_flop) = orig_port.flip_flop {
776 let swizzle_const = |value: &Const| {
777 Const::from_iter(swizzle.data_swizzle.iter().map(|&source| match source {
778 Some(index) => value[orig_subport_index * self.width + index],
779 None => Trit::Undef,
780 }))
781 };
782 let new_flip_flop = subports[new_subport_index].0.flip_flop.as_mut().unwrap();
783 new_flip_flop.clear_value[new_chunk_slice.clone()]
784 .copy_from_slice(&swizzle_const(&flip_flop.clear_value)[..]);
785 new_flip_flop.reset_value[new_chunk_slice.clone()]
786 .copy_from_slice(&swizzle_const(&flip_flop.reset_value)[..]);
787 new_flip_flop.init_value[new_chunk_slice.clone()]
788 .copy_from_slice(&swizzle_const(&flip_flop.init_value)[..]);
789 }
790 let swizzled_output_chunk = subports[new_subport_index].1.slice(new_chunk_slice);
791 for (&index, swizzled_output_bit) in swizzle.data_swizzle.iter().zip(swizzled_output_chunk) {
792 if let Some(index) = index {
793 output_chunk[orig_subport_index * self.width + index] = swizzled_output_bit;
794 }
795 }
796 }
797 output_chunks.push(output_chunk);
798 }
799 for addr_bit in &mux_addr {
801 output_chunks = Vec::from_iter(output_chunks.chunks(2).map(|pair| {
802 if pair.len() == 2 {
803 design.add_mux(addr_bit, &pair[1], &pair[0])
804 } else {
805 pair[0].clone()
807 }
808 }))
809 }
810 let [port_output] = output_chunks.try_into().unwrap();
811 design.replace_value(output.slice(self.read_port_output_slice(port_index)), port_output);
812 read_ports.push(subports);
813 }
814
815 let mut result = vec![];
817 for index in 0..num_mems_total {
818 let data_slice = index * swizzle.data_width_unit..(index + 1) * swizzle.data_width_unit;
819 let depth = if index < num_mems_full { depth_full } else { depth_partial };
820 let mut memory = Memory {
821 depth,
822 width: swizzle.data_width_unit,
823 init_value: Const::undef(depth * swizzle.data_width_unit),
824 write_ports: vec![],
825 read_ports: vec![],
826 };
827 for write_subports in &write_ports {
828 let mut port = MemoryWritePort {
829 addr: write_subports[0].addr.clone(),
830 data: Value::new(),
831 mask: Value::new(),
832 clock: write_subports[0].clock,
833 };
834 for subport in write_subports {
835 port.data.extend(subport.data.slice(data_slice.clone()));
836 port.mask.extend(subport.mask.slice(data_slice.clone()));
837 }
838 memory.write_ports.push(port);
839 }
840 let mut new_output = Value::new();
841 for read_subports in &read_ports {
842 let mut port = MemoryReadPort {
843 addr: read_subports[0].0.addr.clone(),
844 data_len: swizzle.data_width_unit * read_subports.len(),
845 flip_flop: read_subports[0].0.flip_flop.as_ref().map(|flip_flop| MemoryReadFlipFlop {
846 clock: flip_flop.clock,
847 clear: flip_flop.clear,
848 reset: flip_flop.reset,
849 enable: flip_flop.enable,
850 reset_over_enable: flip_flop.reset_over_enable,
851 clear_value: Const::new(),
852 reset_value: Const::new(),
853 init_value: Const::new(),
854 relations: flip_flop.relations.clone(),
855 }),
856 };
857 for (subport, subport_output) in read_subports {
858 if let Some(ref subport_flip_flop) = subport.flip_flop {
859 let port_flip_flop = port.flip_flop.as_mut().unwrap();
860 port_flip_flop.clear_value.extend(subport_flip_flop.clear_value.slice(data_slice.clone()));
861 port_flip_flop.reset_value.extend(subport_flip_flop.reset_value.slice(data_slice.clone()));
862 port_flip_flop.init_value.extend(subport_flip_flop.init_value.slice(data_slice.clone()));
863 }
864 new_output.extend(subport_output.slice(data_slice.clone()));
865 }
866 memory.read_ports.push(port);
867 }
868 result.push((memory, new_output));
869 }
870
871 for row in 0..self.depth {
873 let orig_value = self.init_value.slice(row * self.width..(row + 1) * self.width);
874 let swizzled_value = Const::from_iter(swizzle.data_swizzle.iter().map(|&index| match index {
876 None => Trit::Undef,
877 Some(index) => orig_value[index],
878 }));
879 let mut new_row = 0;
881 let mut new_row_bit = 0;
882 let mut chunk_index = 0;
883 let mut chunk_index_bit = 0;
884 assert!(row < (1 << 32));
885 for bit in 0..32 {
886 let bit_value = (row >> bit) & 1;
887 if ((swizzle.soft_addr_bits_mask >> bit) & 1) != 0 || new_row_bit == swizzle.hard_addr_bits {
888 chunk_index |= bit_value << chunk_index_bit;
889 chunk_index_bit += 1;
890 } else {
891 new_row |= bit_value << new_row_bit;
892 new_row_bit += 1;
893 }
894 }
895 assert_eq!(new_row_bit, swizzle.hard_addr_bits);
896 for (index, bit) in swizzled_value.iter().enumerate() {
898 let full_index = chunk_index * swizzle.data_swizzle.len() + index;
899 let mem_index = full_index / swizzle.data_width_unit;
900 let mem_bit_index = full_index % swizzle.data_width_unit;
901 result[mem_index].0.init_value[new_row * swizzle.data_width_unit + mem_bit_index] = bit;
902 }
903 }
904
905 result
906 }
907
908 fn can_lower_fallback(&self) -> bool {
909 for port in &self.write_ports {
910 if port.clock != self.write_ports[0].clock {
911 return false;
912 }
913 }
914 true
915 }
916
917 fn lower_fallback(mut self, design: &Design, output: &Value) {
918 let mut write_clock = None;
919 for write_port in &self.write_ports {
920 if write_clock.is_none() {
921 write_clock = Some(write_port.clock);
922 } else if write_clock != Some(write_port.clock) {
923 panic!("trying to lower memory with multiple write clocks");
924 }
925 }
926 let mut output = output.clone();
927 for port_index in 0..self.read_ports.len() {
929 self.unmap_read_dff(design, port_index, &mut output);
930 }
931 let max_wide_log2 = self
932 .write_ports
933 .iter()
934 .map(|port| port.wide_log2(&self))
935 .max()
936 .unwrap_or(0)
937 .max(self.read_ports.iter().map(|port| port.wide_log2(&self)).max().unwrap_or(0));
938 let swizzle = MemorySwizzle {
940 data_swizzle: Vec::from_iter((0..self.width).map(Some)),
941 soft_addr_bits_mask: (1 << max_wide_log2) - 1,
942 write_wide_log2: vec![0; self.write_ports.len()],
943 read_wide_log2: vec![0; self.read_ports.len()],
944 hard_addr_bits: 0,
945 data_width_unit: self.width,
946 };
947 for (memory, mem_output) in self.swizzle(design, &output, &swizzle) {
948 assert_eq!(memory.depth, 1);
949 let data = if memory.write_ports.is_empty() {
950 Value::from(memory.init_value)
951 } else {
952 let q = design.add_void(memory.width);
953 let mut data = q.clone();
954 for port in memory.write_ports {
955 assert_eq!(port.data.len(), data.len());
956 data = design.add_bitwise_mux(port.mask, port.data, data);
958 }
959 design.replace_value(
960 &q,
961 design.add_dff(FlipFlop::new(data, write_clock.unwrap()).with_init(memory.init_value)),
962 );
963 q
964 };
965 for (port_index, port) in memory.read_ports.into_iter().enumerate() {
968 design
969 .replace_value(mem_output.slice(port_index * memory.width..(port_index + 1) * memory.width), &data);
970 assert_eq!(port.data_len, data.len());
971 }
972 }
973 }
974}
975
976trait DesignExt {
977 fn add_dedup_dff(&self, flip_flop: FlipFlop) -> Value;
978 fn add_dedup_mux(&self, arg1: impl Into<ControlNet>, arg2: impl Into<Value>, arg3: impl Into<Value>) -> Value;
979 fn add_bitwise_mux(&self, arg1: impl Into<Value>, arg2: impl Into<Value>, arg3: impl Into<Value>) -> Value;
980}
981
982impl DesignExt for Design {
983 fn add_dedup_dff(&self, mut flip_flop: FlipFlop) -> Value {
984 let mut cache = BTreeMap::new();
985 let mut result_swizzle = vec![];
986 let mut dedup_data = Value::new();
987 let mut dedup_load_data = Value::new();
988 let mut dedup_reset_value = Const::new();
989 let mut dedup_clear_value = Const::new();
990 let mut dedup_init_value = Const::new();
991
992 for ((((data_bit, load_data_bit), reset_bit), clear_bit), init_bit) in flip_flop
993 .data
994 .iter()
995 .zip(flip_flop.load_data.iter())
996 .zip(flip_flop.reset_value)
997 .zip(flip_flop.clear_value)
998 .zip(flip_flop.init_value)
999 {
1000 match cache.entry((data_bit, load_data_bit, reset_bit, clear_bit, init_bit)) {
1001 btree_map::Entry::Vacant(entry) => {
1002 let index = dedup_data.len();
1003 dedup_data.push(data_bit);
1004 dedup_load_data.push(load_data_bit);
1005 dedup_reset_value.push(reset_bit);
1006 dedup_clear_value.push(clear_bit);
1007 dedup_init_value.push(init_bit);
1008 result_swizzle.push(index);
1009 entry.insert(index);
1010 }
1011 btree_map::Entry::Occupied(entry) => {
1012 result_swizzle.push(*entry.get());
1013 }
1014 }
1015 }
1016 flip_flop.data = dedup_data;
1017 flip_flop.load_data = dedup_load_data;
1018 flip_flop.reset_value = dedup_reset_value;
1019 flip_flop.clear_value = dedup_clear_value;
1020 flip_flop.init_value = dedup_init_value;
1021 let dedup_result = self.add_dff(flip_flop);
1022 Value::from_iter(result_swizzle.iter().map(|&index| dedup_result[index]))
1023 }
1024
1025 fn add_dedup_mux(&self, arg1: impl Into<ControlNet>, arg2: impl Into<Value>, arg3: impl Into<Value>) -> Value {
1026 let arg1 = arg1.into();
1027 let arg2 = arg2.into();
1028 let arg3 = arg3.into();
1029 let mut cache = BTreeMap::new();
1030 let mut result_swizzle = vec![];
1031 let mut dedup_arg2 = Value::new();
1032 let mut dedup_arg3 = Value::new();
1033 for (arg2_bit, arg3_bit) in arg2.iter().zip(arg3) {
1034 match cache.entry((arg2_bit, arg3_bit)) {
1035 btree_map::Entry::Vacant(entry) => {
1036 let index = dedup_arg2.len();
1037 dedup_arg2.push(arg2_bit);
1038 dedup_arg3.push(arg3_bit);
1039 result_swizzle.push(index);
1040 entry.insert(index);
1041 }
1042 btree_map::Entry::Occupied(entry) => {
1043 result_swizzle.push(*entry.get());
1044 }
1045 }
1046 }
1047 let dedup_result = self.add_mux(arg1, dedup_arg2, dedup_arg3);
1048 Value::from_iter(result_swizzle.iter().map(|&index| dedup_result[index]))
1049 }
1050
1051 fn add_bitwise_mux(&self, arg1: impl Into<Value>, arg2: impl Into<Value>, arg3: impl Into<Value>) -> Value {
1052 let arg1 = arg1.into();
1053 let arg2 = arg2.into();
1054 let arg3 = arg3.into();
1055 let mut result = Value::new();
1056 let mut index = 0;
1057 assert_eq!(arg1.len(), arg2.len());
1058 assert_eq!(arg1.len(), arg3.len());
1059 while index < arg1.len() {
1060 let mut end_index = index;
1061 while end_index < arg1.len() && arg1[end_index] == arg1[index] {
1062 end_index += 1;
1063 }
1064 let arg2_chunk = arg2.slice(index..end_index);
1065 let arg3_chunk = arg3.slice(index..end_index);
1066 result.extend(self.add_dedup_mux(arg1[index], arg2_chunk, arg3_chunk));
1067 index = end_index;
1068 }
1069 result
1070 }
1071}