ICFP 2007 Contest: https://web.archive.org/web/20090301164728/https://save-endo.cs.uu.nl/
1use crate::dna::Base;
2use crate::dna::DnaIterator;
3use crate::dna::DnaRef;
4use crate::pattern::Pattern;
5use crate::pattern::PatternItem;
6use crate::rna::Rna;
7use crate::template::Template;
8use crate::template::TemplateItem;
9use std::mem;
10
11pub struct Parser<'a> {
12 iter: DnaIterator<'a>,
13 peeked: Vec<Base>,
14 index: usize,
15}
16
17impl<'a> Parser<'a> {
18 pub fn new(buf: &'a DnaRef) -> Parser<'a> {
19 Parser {
20 iter: buf.iter(),
21 peeked: Vec::new(),
22 index: 0,
23 }
24 }
25
26 pub fn index(&self) -> usize {
27 self.index
28 }
29
30 fn peek_to(&mut self, n: usize) {
31 while self.peeked.len() < n {
32 if let Some(b) = self.iter.next() {
33 self.peeked.push(b);
34 } else {
35 return;
36 }
37 }
38 }
39
40 pub fn next_is(&mut self, next: &[Base]) -> bool {
41 self.peek_to(next.len());
42 if self.peeked.len() < next.len() {
43 return false;
44 }
45 return &self.peeked[..next.len()] == next;
46 }
47
48 pub fn advance(&mut self, n: usize) {
49 self.peek_to(n);
50 self.index += n;
51 self.peeked.drain(..n);
52 }
53
54 pub fn nat(&mut self) -> Option<usize> {
55 let mut ret = 0;
56 let mut bit = 1;
57 loop {
58 if self.next_is(&[Base::P]) {
59 self.advance(1);
60 return Some(ret);
61 } else if self.next_is(&[Base::I]) || self.next_is(&[Base::F]) {
62 self.advance(1);
63 } else if self.next_is(&[Base::C]) {
64 ret += bit;
65 self.advance(1);
66 } else {
67 return None;
68 }
69
70 bit *= 2;
71 }
72 }
73
74 pub fn consts(&mut self) -> Vec<Base> {
75 let mut ret = Vec::new();
76 loop {
77 if self.next_is(&[Base::C]) {
78 self.advance(1);
79 ret.push(Base::I);
80 } else if self.next_is(&[Base::F]) {
81 self.advance(1);
82 ret.push(Base::C);
83 } else if self.next_is(&[Base::P]) {
84 self.advance(1);
85 ret.push(Base::F);
86 } else if self.next_is(&[Base::I, Base::C]) {
87 self.advance(2);
88 ret.push(Base::P);
89 } else {
90 return ret;
91 }
92 }
93 }
94
95 pub fn pattern(&mut self, rna: &mut Vec<Rna>) -> Option<Pattern> {
96 let mut ret = Vec::new();
97 let mut level = 0;
98 loop {
99 if self.next_is(&[Base::C]) {
100 self.advance(1);
101 ret.push(PatternItem::Base(Base::I));
102 } else if self.next_is(&[Base::F]) {
103 self.advance(1);
104 ret.push(PatternItem::Base(Base::C));
105 } else if self.next_is(&[Base::P]) {
106 self.advance(1);
107 ret.push(PatternItem::Base(Base::F));
108 } else if self.next_is(&[Base::I, Base::C]) {
109 self.advance(2);
110 ret.push(PatternItem::Base(Base::P));
111 } else if self.next_is(&[Base::I, Base::P]) {
112 self.advance(2);
113 let n = self.nat()?;
114 ret.push(PatternItem::Skip(n));
115 } else if self.next_is(&[Base::I, Base::F]) {
116 self.advance(3);
117 let s = self.consts();
118 ret.push(PatternItem::Search(s));
119 } else if self.next_is(&[Base::I, Base::I, Base::P]) {
120 self.advance(3);
121 level += 1;
122 ret.push(PatternItem::Open);
123 } else if self.next_is(&[Base::I, Base::I, Base::C])
124 || self.next_is(&[Base::I, Base::I, Base::F])
125 {
126 self.advance(3);
127 if level == 0 {
128 return Some(ret);
129 }
130 level -= 1;
131 ret.push(PatternItem::Close);
132 } else if self.next_is(&[Base::I, Base::I, Base::I]) {
133 self.advance(3);
134 self.peek_to(7);
135 let mut r = Vec::new();
136 mem::swap(&mut r, &mut self.peeked);
137 rna.push(r.try_into().unwrap());
138 self.index += 7;
139 } else {
140 return None;
141 }
142 }
143 }
144
145 pub fn template(&mut self, rna: &mut Vec<Rna>) -> Option<Template> {
146 let mut ret = Vec::new();
147 loop {
148 if self.next_is(&[Base::C]) {
149 self.advance(1);
150 ret.push(TemplateItem::Base(Base::I));
151 } else if self.next_is(&[Base::F]) {
152 self.advance(1);
153 ret.push(TemplateItem::Base(Base::C));
154 } else if self.next_is(&[Base::P]) {
155 self.advance(1);
156 ret.push(TemplateItem::Base(Base::F));
157 } else if self.next_is(&[Base::I, Base::C]) {
158 self.advance(2);
159 ret.push(TemplateItem::Base(Base::P));
160 } else if self.next_is(&[Base::I, Base::P]) || self.next_is(&[Base::I, Base::F]) {
161 self.advance(2);
162 let l = self.nat()?;
163 let n = self.nat()?;
164 ret.push(TemplateItem::Ref(n, l));
165 } else if self.next_is(&[Base::I, Base::I, Base::C])
166 || self.next_is(&[Base::I, Base::I, Base::F])
167 {
168 self.advance(3);
169 return Some(ret);
170 } else if self.next_is(&[Base::I, Base::I, Base::P]) {
171 self.advance(3);
172 let n = self.nat()?;
173 ret.push(TemplateItem::Len(n));
174 } else if self.next_is(&[Base::I, Base::I, Base::I]) {
175 self.advance(3);
176 self.peek_to(7);
177 let mut r = Vec::new();
178 mem::swap(&mut r, &mut self.peeked);
179 rna.push(r.try_into().unwrap());
180 self.index += 7;
181 } else {
182 return None;
183 }
184 }
185 }
186}
187
188#[cfg(test)]
189mod tests {
190 use super::*;
191
192 #[test]
193 fn test_base() {
194 let dna = DnaRef::from_string("ICFP");
195 let mut parser = Parser::new(&dna);
196 assert!(parser.next_is(&[Base::I]));
197 assert!(parser.next_is(&[Base::I, Base::C]));
198 assert!(parser.next_is(&[Base::I, Base::C, Base::F]));
199 assert!(parser.next_is(&[Base::I, Base::C, Base::F, Base::P]));
200 }
201
202 #[test]
203 fn test_nat() {
204 let dna = DnaRef::from_string("CP");
205 let mut parser = Parser::new(&dna);
206 assert_eq!(parser.nat(), Some(1));
207 assert_eq!(parser.index, 2);
208
209 let dna = DnaRef::from_string("ICICP");
210 let mut parser = Parser::new(&dna);
211 assert_eq!(parser.nat(), Some(10));
212 assert_eq!(parser.index, 5);
213
214 let dna = DnaRef::from_string("III");
215 let mut parser = Parser::new(&dna);
216 assert_eq!(parser.nat(), None);
217 assert_eq!(parser.index, 3);
218 }
219
220 #[test]
221 fn test_consts() {
222 let dna = DnaRef::from_string("CFPICIIC");
223 let mut parser = Parser::new(&dna);
224 assert_eq!(parser.consts(), &[Base::I, Base::C, Base::F, Base::P]);
225 assert_eq!(parser.index, 5);
226 }
227
228 #[test]
229 fn test_pattern() {
230 let mut rna = Vec::new();
231
232 let dna = DnaRef::from_string("CIIC");
233 let mut parser = Parser::new(&dna);
234 assert_eq!(
235 parser.pattern(&mut rna),
236 Some(vec![PatternItem::Base(Base::I)])
237 );
238 assert_eq!(parser.index, 4);
239
240 let dna = DnaRef::from_string("IIPIPICPIICICIIF");
241 let mut parser = Parser::new(&dna);
242 assert_eq!(
243 parser.pattern(&mut rna),
244 Some(vec![
245 PatternItem::Open,
246 PatternItem::Skip(2),
247 PatternItem::Close,
248 PatternItem::Base(Base::P)
249 ])
250 );
251 assert_eq!(parser.index, 16);
252
253 let dna = DnaRef::from_string("IIIPFCICFPIIC");
254 let mut parser = Parser::new(&dna);
255 assert_eq!(parser.pattern(&mut rna), Some(vec![]));
256 assert_eq!(parser.index, 13);
257 assert_eq!(
258 rna,
259 vec![[
260 Base::P,
261 Base::F,
262 Base::C,
263 Base::I,
264 Base::C,
265 Base::F,
266 Base::P
267 ]]
268 );
269 }
270
271 #[test]
272 fn test_template() {
273 let mut rna = Vec::new();
274
275 let dna = DnaRef::from_string("CFPICIFCPICPIIPIICPIIC");
276 let mut parser = Parser::new(&dna);
277 assert_eq!(
278 parser.template(&mut rna),
279 Some(vec![
280 TemplateItem::Base(Base::I),
281 TemplateItem::Base(Base::C),
282 TemplateItem::Base(Base::F),
283 TemplateItem::Base(Base::P),
284 TemplateItem::Ref(2, 1),
285 TemplateItem::Len(4)
286 ])
287 );
288 assert_eq!(parser.index, 22);
289
290 let dna = DnaRef::from_string("IIIPFCICFPIIC");
291 let mut parser = Parser::new(&dna);
292 assert_eq!(parser.template(&mut rna), Some(vec![]));
293 assert_eq!(parser.index, 13);
294 assert_eq!(
295 rna,
296 vec![[
297 Base::P,
298 Base::F,
299 Base::C,
300 Base::I,
301 Base::C,
302 Base::F,
303 Base::P
304 ]]
305 );
306 }
307}