ICFP 2007 Contest: https://web.archive.org/web/20090301164728/https://save-endo.cs.uu.nl/

Dna data structure

We don't care about performance, just getting it working.

Changed files
+388 -1
dna2rna
+382
dna2rna/src/dna.rs
···
+
use std::collections::VecDeque;
+
use std::ops::Deref;
+
use std::rc::Rc;
+
+
#[derive(Clone)]
+
pub enum Dna {
+
Empty,
+
Leaf(char),
+
TwoNode(TwoNode),
+
ThreeNode(ThreeNode),
+
}
+
+
#[derive(Clone)]
+
// Wrap Rc in a single-value enum so I can implement traits for it.
+
pub enum DnaRef {
+
DnaRef(Rc<Dna>),
+
}
+
+
#[derive(Clone)]
+
pub struct TwoNode {
+
len: usize,
+
depth: usize,
+
children: (DnaRef, DnaRef),
+
}
+
+
#[derive(Clone)]
+
pub struct ThreeNode {
+
len: usize,
+
depth: usize,
+
children: (DnaRef, DnaRef, DnaRef),
+
}
+
+
enum ConcatState {
+
One(DnaRef),
+
Two(DnaRef, DnaRef),
+
}
+
+
impl DnaRef {
+
pub fn new() -> Self {
+
Self::DnaRef(Rc::new(Dna::Empty))
+
}
+
+
fn from_char(c: char) -> Self {
+
Self::DnaRef(Rc::new(Dna::Leaf(c)))
+
}
+
+
fn from_two_children(a: DnaRef, b: DnaRef) -> Self {
+
debug_assert_eq!(a.depth(), b.depth());
+
Self::DnaRef(Rc::new(Dna::TwoNode(TwoNode {
+
len: a.len() + b.len(),
+
depth: a.depth() + 1,
+
children: (a, b),
+
})))
+
}
+
+
fn from_three_children(a: DnaRef, b: DnaRef, c: DnaRef) -> Self {
+
debug_assert_eq!(a.depth(), b.depth());
+
debug_assert_eq!(a.depth(), c.depth());
+
Self::DnaRef(Rc::new(Dna::ThreeNode(ThreeNode {
+
len: a.len() + b.len() + c.len(),
+
depth: a.depth() + 1,
+
children: (a, b, c),
+
})))
+
}
+
+
pub fn from_string(s: &str) -> Self {
+
s.chars().map(|c| DnaRef::from_char(c)).collect::<DnaRef>()
+
}
+
+
pub fn len(&self) -> usize {
+
match &**self {
+
Dna::Empty => 0,
+
Dna::Leaf(_) => 1,
+
Dna::TwoNode(node) => node.len,
+
Dna::ThreeNode(node) => node.len,
+
}
+
}
+
+
pub fn depth(&self) -> usize {
+
match &**self {
+
Dna::Empty => 0,
+
Dna::Leaf(_) => 1,
+
Dna::TwoNode(node) => node.depth,
+
Dna::ThreeNode(node) => node.depth,
+
}
+
}
+
+
// Use Index trait instead
+
pub fn index(&self, n: usize) -> char {
+
debug_assert!(n < self.len());
+
+
match &**self {
+
Dna::Empty => unreachable!(),
+
Dna::Leaf(c) => *c,
+
Dna::TwoNode(node) => {
+
let (a, b) = &node.children;
+
if n < a.len() {
+
a.index(n)
+
} else {
+
b.index(n - a.len())
+
}
+
}
+
Dna::ThreeNode(node) => {
+
let (a, b, c) = &node.children;
+
if n < a.len() {
+
a.index(n)
+
} else if n < a.len() + b.len() {
+
b.index(n - a.len())
+
} else {
+
c.index(n - a.len() - b.len())
+
}
+
}
+
}
+
}
+
+
pub fn split(&self, n: usize) -> (DnaRef, DnaRef) {
+
debug_assert!(n <= self.len());
+
match &**self {
+
Dna::Empty => (Self::new(), Self::new()),
+
Dna::Leaf(c) => {
+
if n == 0 {
+
(Self::new(), Self::from_char(*c))
+
} else {
+
(Self::from_char(*c), Self::new())
+
}
+
}
+
Dna::TwoNode(node) => {
+
let (a, b) = &node.children;
+
if n < a.len() {
+
let (x, y) = a.split(n);
+
(x, Self::concat(y, b.clone()))
+
} else if n == a.len() {
+
(a.clone(), b.clone())
+
} else {
+
let (x, y) = b.split(n - a.len());
+
(Self::concat(a.clone(), x), y)
+
}
+
}
+
Dna::ThreeNode(node) => {
+
let (a, b, c) = &node.children;
+
if n < a.len() {
+
let (x, y) = a.split(n);
+
(x, Self::concat(Self::concat(y, b.clone()), c.clone()))
+
} else if n == a.len() {
+
(a.clone(), Self::concat(b.clone(), c.clone()))
+
} else if n - a.len() < b.len() {
+
let (x, y) = b.split(n - a.len());
+
(Self::concat(a.clone(), x), Self::concat(y, c.clone()))
+
} else if n == a.len() + b.len() {
+
(Self::concat(a.clone(), b.clone()), c.clone())
+
} else {
+
let (x, y) = c.split(n - a.len() - b.len());
+
(Self::concat(a.clone(), Self::concat(b.clone(), x)), y)
+
}
+
}
+
}
+
}
+
+
fn concat_helper(lhs: DnaRef, rhs: DnaRef) -> ConcatState {
+
if lhs.depth() == rhs.depth() {
+
ConcatState::Two(lhs, rhs)
+
} else if lhs.depth() < rhs.depth() {
+
match &*rhs {
+
Dna::Empty => unreachable!(),
+
Dna::Leaf(_) => unreachable!(),
+
Dna::TwoNode(node) => {
+
let (a, b) = &node.children;
+
match Self::concat_helper(lhs, a.clone()) {
+
ConcatState::One(x) => {
+
ConcatState::One(Self::from_two_children(x, b.clone()))
+
}
+
ConcatState::Two(x, y) => {
+
ConcatState::One(Self::from_three_children(x, y, b.clone()))
+
}
+
}
+
}
+
Dna::ThreeNode(node) => {
+
let (a, b, c) = &node.children;
+
match Self::concat_helper(lhs, a.clone()) {
+
ConcatState::One(x) => {
+
ConcatState::One(Self::from_three_children(x, b.clone(), c.clone()))
+
}
+
ConcatState::Two(x, y) => ConcatState::Two(
+
Self::from_two_children(x, y),
+
Self::from_two_children(b.clone(), c.clone()),
+
),
+
}
+
}
+
}
+
} else {
+
match &*lhs {
+
Dna::Empty => unreachable!(),
+
Dna::Leaf(_) => unreachable!(),
+
Dna::TwoNode(node) => {
+
let (a, b) = &node.children;
+
match Self::concat_helper(b.clone(), rhs) {
+
ConcatState::One(x) => {
+
ConcatState::One(Self::from_two_children(a.clone(), x))
+
}
+
ConcatState::Two(x, y) => {
+
ConcatState::One(Self::from_three_children(a.clone(), x, y))
+
}
+
}
+
}
+
Dna::ThreeNode(node) => {
+
let (a, b, c) = &node.children;
+
match Self::concat_helper(c.clone(), rhs) {
+
ConcatState::One(x) => {
+
ConcatState::One(Self::from_three_children(a.clone(), b.clone(), x))
+
}
+
ConcatState::Two(x, y) => ConcatState::Two(
+
Self::from_two_children(a.clone(), b.clone()),
+
Self::from_two_children(x, y),
+
),
+
}
+
}
+
}
+
}
+
}
+
+
// Use Add trait instead
+
pub fn concat(lhs: DnaRef, rhs: DnaRef) -> Self {
+
match Self::concat_helper(lhs, rhs) {
+
ConcatState::One(a) => a,
+
ConcatState::Two(a, b) => Self::from_two_children(a, b),
+
}
+
}
+
+
pub fn iter<'a>(&'a self) -> DnaIterator<'a> {
+
let mut stack = Vec::new();
+
stack.push(self);
+
DnaIterator { stack }
+
}
+
}
+
+
impl Deref for DnaRef {
+
type Target = Dna;
+
+
fn deref(&self) -> &Self::Target {
+
match self {
+
DnaRef::DnaRef(r) => &*r,
+
}
+
}
+
}
+
+
struct DnaRefIter<I>
+
where
+
I: Iterator<Item = DnaRef>,
+
{
+
iter: I,
+
buf: VecDeque<DnaRef>,
+
}
+
+
impl<I> DnaRefIter<I>
+
where
+
I: Iterator<Item = DnaRef>,
+
{
+
fn new(iter: I) -> DnaRefIter<I> {
+
DnaRefIter {
+
iter: iter,
+
buf: VecDeque::with_capacity(5),
+
}
+
}
+
}
+
+
impl<I> Iterator for DnaRefIter<I>
+
where
+
I: Iterator<Item = DnaRef>,
+
{
+
type Item = DnaRef;
+
fn next(&mut self) -> Option<Self::Item> {
+
while self.buf.len() < 5 {
+
let Some(x) = self.iter.next() else {
+
break;
+
};
+
+
self.buf.push_back(x);
+
}
+
+
if self.buf.len() == 5 || self.buf.len() == 3 {
+
let a = self.buf.pop_front().unwrap();
+
let b = self.buf.pop_front().unwrap();
+
let c = self.buf.pop_front().unwrap();
+
+
return Some(DnaRef::from_three_children(a, b, c));
+
} else if self.buf.len() == 4 || self.buf.len() == 2 {
+
let a = self.buf.pop_front().unwrap();
+
let b = self.buf.pop_front().unwrap();
+
+
return Some(DnaRef::from_two_children(a, b));
+
} else {
+
return None;
+
}
+
}
+
}
+
+
impl FromIterator<DnaRef> for DnaRef {
+
fn from_iter<I: IntoIterator<Item = DnaRef>>(iter: I) -> DnaRef {
+
// We need some kind of buffer no matter what, even if we aggregate as we go.
+
// This implementation does the easy thing and just repeatedly collects into a Vec.
+
let mut cur: Vec<DnaRef> = iter.into_iter().collect::<Vec<_>>();
+
while cur.len() > 1 {
+
cur = DnaRefIter::new(cur.into_iter()).collect();
+
}
+
+
return cur.pop().unwrap();
+
}
+
}
+
+
pub struct DnaIterator<'a> {
+
stack: Vec<&'a DnaRef>,
+
}
+
+
impl<'a> Iterator for DnaIterator<'a> {
+
type Item = char;
+
+
fn next(&mut self) -> Option<Self::Item> {
+
while let Some(node) = self.stack.pop() {
+
match &**node {
+
Dna::Empty => return None,
+
Dna::Leaf(c) => return Some(*c),
+
Dna::TwoNode(node) => {
+
let (a, b) = &node.children;
+
self.stack.push(b);
+
self.stack.push(a);
+
}
+
Dna::ThreeNode(node) => {
+
let (a, b, c) = &node.children;
+
self.stack.push(c);
+
self.stack.push(b);
+
self.stack.push(a);
+
}
+
}
+
}
+
+
return None;
+
}
+
}
+
+
#[cfg(test)]
+
mod tests {
+
use super::*;
+
+
#[test]
+
fn test_empty() {
+
let dna: DnaRef = DnaRef::new();
+
assert_eq!(dna.len(), 0);
+
}
+
+
#[test]
+
fn test_to_from_string() {
+
let dna: DnaRef = DnaRef::from_string("ICFP");
+
assert_eq!(dna.iter().collect::<String>(), "ICFP");
+
}
+
+
#[test]
+
fn test_index() {
+
let s = "ICFP";
+
let dna: DnaRef = DnaRef::from_string(s);
+
for (i, c) in s.chars().enumerate() {
+
assert_eq!(dna.index(i), c);
+
}
+
}
+
+
#[test]
+
fn test_concat() {
+
let lhs = DnaRef::from_string("ABCD");
+
let rhs = DnaRef::from_string("WXYZ");
+
assert_eq!(
+
DnaRef::concat(lhs, rhs).iter().collect::<String>(),
+
"ABCDWXYZ"
+
);
+
}
+
+
#[test]
+
fn test_split() {
+
let dna = DnaRef::from_string("ABCDWXYZ");
+
let (lhs, rhs) = dna.split(4);
+
assert_eq!(lhs.iter().collect::<String>(), "ABCD");
+
assert_eq!(rhs.iter().collect::<String>(), "WXYZ");
+
}
+
}
+6 -1
dna2rna/src/main.rs
···
+
mod dna;
+
+
use crate::dna::*;
+
fn main() {
-
println!("Hello, world!");
+
let dna: DnaRef = DnaRef::from_string("ICFP");
+
println!("Back to string: {}", dna.iter().collect::<String>());
}