ICFP 2007 Contest: https://web.archive.org/web/20090301164728/https://save-endo.cs.uu.nl/

Parsing with iterators is roughly twice as fast

Changed files
+60 -45
dna2rna
src
+60 -45
dna2rna/src/parser.rs
···
use crate::dna::Base;
+
use crate::dna::DnaIterator;
use crate::dna::DnaRef;
use crate::pattern::Pattern;
use crate::pattern::PatternItem;
use crate::rna::Rna;
use crate::template::Template;
use crate::template::TemplateItem;
+
use std::mem;
pub struct Parser<'a> {
-
buf: &'a DnaRef,
+
iter: DnaIterator<'a>,
+
peeked: Vec<Base>,
index: usize,
}
impl<'a> Parser<'a> {
pub fn new(buf: &'a DnaRef) -> Parser<'a> {
-
Parser { buf: buf, index: 0 }
+
Parser {
+
iter: buf.iter(),
+
peeked: Vec::new(),
+
index: 0,
+
}
}
pub fn index(&self) -> usize {
self.index
}
-
pub fn next_is(&mut self, next: &[Base]) -> bool {
-
if self.index + next.len() > self.buf.len() {
-
return false;
+
fn peek_to(&mut self, n: usize) {
+
while self.peeked.len() < n {
+
if let Some(b) = self.iter.next() {
+
self.peeked.push(b);
+
} else {
+
return;
+
}
}
+
}
-
for i in 0..next.len() {
-
if self.buf[self.index + i] != next[i] {
-
return false;
-
}
-
}
+
pub fn next_is(&mut self, next: &[Base]) -> bool {
+
self.peek_to(next.len());
+
if self.peeked.len() < next.len() {
+
return false;
+
}
+
return &self.peeked[..next.len()] == next;
+
}
-
return true;
+
pub fn advance(&mut self, n: usize) {
+
self.peek_to(n);
+
self.index += n;
+
self.peeked.drain(..n);
}
pub fn nat(&mut self) -> Option<usize> {
···
let mut bit = 1;
loop {
if self.next_is(&[Base::P]) {
-
self.index += 1;
+
self.advance(1);
return Some(ret);
} else if self.next_is(&[Base::I]) || self.next_is(&[Base::F]) {
-
self.index += 1;
+
self.advance(1);
} else if self.next_is(&[Base::C]) {
ret += bit;
-
self.index += 1;
+
self.advance(1);
} else {
return None;
}
···
let mut ret = Vec::new();
loop {
if self.next_is(&[Base::C]) {
-
self.index += 1;
+
self.advance(1);
ret.push(Base::I);
} else if self.next_is(&[Base::F]) {
-
self.index += 1;
+
self.advance(1);
ret.push(Base::C);
} else if self.next_is(&[Base::P]) {
-
self.index += 1;
+
self.advance(1);
ret.push(Base::F);
} else if self.next_is(&[Base::I, Base::C]) {
-
self.index += 2;
+
self.advance(2);
ret.push(Base::P);
} else {
return ret;
···
let mut level = 0;
loop {
if self.next_is(&[Base::C]) {
-
self.index += 1;
+
self.advance(1);
ret.push(PatternItem::Base(Base::I));
} else if self.next_is(&[Base::F]) {
-
self.index += 1;
+
self.advance(1);
ret.push(PatternItem::Base(Base::C));
} else if self.next_is(&[Base::P]) {
-
self.index += 1;
+
self.advance(1);
ret.push(PatternItem::Base(Base::F));
} else if self.next_is(&[Base::I, Base::C]) {
-
self.index += 2;
+
self.advance(2);
ret.push(PatternItem::Base(Base::P));
} else if self.next_is(&[Base::I, Base::P]) {
-
self.index += 2;
+
self.advance(2);
let n = self.nat()?;
ret.push(PatternItem::Skip(n));
} else if self.next_is(&[Base::I, Base::F]) {
-
self.index += 3;
+
self.advance(3);
let s = self.consts();
ret.push(PatternItem::Search(s));
} else if self.next_is(&[Base::I, Base::I, Base::P]) {
-
self.index += 3;
+
self.advance(3);
level += 1;
ret.push(PatternItem::Open);
} else if self.next_is(&[Base::I, Base::I, Base::C])
|| self.next_is(&[Base::I, Base::I, Base::F])
{
-
self.index += 3;
+
self.advance(3);
if level == 0 {
return Some(ret);
}
level -= 1;
ret.push(PatternItem::Close);
} else if self.next_is(&[Base::I, Base::I, Base::I]) {
-
self.index += 3;
-
let mut new_rna = Vec::new();
-
for i in 0..7 {
-
new_rna.push(self.buf[self.index + i]);
-
}
+
self.advance(3);
+
self.peek_to(7);
+
let mut r = Vec::new();
+
mem::swap(&mut r, &mut self.peeked);
+
rna.push(r.try_into().unwrap());
self.index += 7;
-
rna.push(new_rna.try_into().unwrap());
} else {
return None;
}
···
let mut ret = Vec::new();
loop {
if self.next_is(&[Base::C]) {
-
self.index += 1;
+
self.advance(1);
ret.push(TemplateItem::Base(Base::I));
} else if self.next_is(&[Base::F]) {
-
self.index += 1;
+
self.advance(1);
ret.push(TemplateItem::Base(Base::C));
} else if self.next_is(&[Base::P]) {
-
self.index += 1;
+
self.advance(1);
ret.push(TemplateItem::Base(Base::F));
} else if self.next_is(&[Base::I, Base::C]) {
-
self.index += 2;
+
self.advance(2);
ret.push(TemplateItem::Base(Base::P));
} else if self.next_is(&[Base::I, Base::P]) || self.next_is(&[Base::I, Base::F]) {
-
self.index += 2;
+
self.advance(2);
let l = self.nat()?;
let n = self.nat()?;
ret.push(TemplateItem::Ref(n, l));
} else if self.next_is(&[Base::I, Base::I, Base::C])
|| self.next_is(&[Base::I, Base::I, Base::F])
{
-
self.index += 3;
+
self.advance(3);
return Some(ret);
} else if self.next_is(&[Base::I, Base::I, Base::P]) {
-
self.index += 3;
+
self.advance(3);
let n = self.nat()?;
ret.push(TemplateItem::Len(n));
} else if self.next_is(&[Base::I, Base::I, Base::I]) {
-
self.index += 3;
-
let mut new_rna = Vec::new();
-
for i in 0..7 {
-
new_rna.push(self.buf[self.index + i]);
-
}
+
self.advance(3);
+
self.peek_to(7);
+
let mut r = Vec::new();
+
mem::swap(&mut r, &mut self.peeked);
+
rna.push(r.try_into().unwrap());
self.index += 7;
-
rna.push(new_rna.try_into().unwrap());
} else {
return None;
}