ICFP 2007 Contest: https://web.archive.org/web/20090301164728/https://save-endo.cs.uu.nl/
1use crate::dna::Base; 2use crate::dna::DnaIterator; 3use crate::dna::DnaRef; 4use crate::pattern::Pattern; 5use crate::pattern::PatternItem; 6use crate::rna::Rna; 7use crate::template::Template; 8use crate::template::TemplateItem; 9use std::mem; 10 11pub struct Parser<'a> { 12 iter: DnaIterator<'a>, 13 peeked: Vec<Base>, 14 index: usize, 15} 16 17impl<'a> Parser<'a> { 18 pub fn new(buf: &'a DnaRef) -> Parser<'a> { 19 Parser { 20 iter: buf.iter(), 21 peeked: Vec::new(), 22 index: 0, 23 } 24 } 25 26 pub fn index(&self) -> usize { 27 self.index 28 } 29 30 fn peek_to(&mut self, n: usize) { 31 while self.peeked.len() < n { 32 if let Some(b) = self.iter.next() { 33 self.peeked.push(b); 34 } else { 35 return; 36 } 37 } 38 } 39 40 pub fn next_is(&mut self, next: &[Base]) -> bool { 41 self.peek_to(next.len()); 42 if self.peeked.len() < next.len() { 43 return false; 44 } 45 return &self.peeked[..next.len()] == next; 46 } 47 48 pub fn advance(&mut self, n: usize) { 49 self.peek_to(n); 50 self.index += n; 51 self.peeked.drain(..n); 52 } 53 54 pub fn nat(&mut self) -> Option<usize> { 55 let mut ret = 0; 56 let mut bit = 1; 57 loop { 58 if self.next_is(&[Base::P]) { 59 self.advance(1); 60 return Some(ret); 61 } else if self.next_is(&[Base::I]) || self.next_is(&[Base::F]) { 62 self.advance(1); 63 } else if self.next_is(&[Base::C]) { 64 ret += bit; 65 self.advance(1); 66 } else { 67 return None; 68 } 69 70 bit *= 2; 71 } 72 } 73 74 pub fn consts(&mut self) -> Vec<Base> { 75 let mut ret = Vec::new(); 76 loop { 77 if self.next_is(&[Base::C]) { 78 self.advance(1); 79 ret.push(Base::I); 80 } else if self.next_is(&[Base::F]) { 81 self.advance(1); 82 ret.push(Base::C); 83 } else if self.next_is(&[Base::P]) { 84 self.advance(1); 85 ret.push(Base::F); 86 } else if self.next_is(&[Base::I, Base::C]) { 87 self.advance(2); 88 ret.push(Base::P); 89 } else { 90 return ret; 91 } 92 } 93 } 94 95 pub fn pattern(&mut self, rna: &mut Vec<Rna>) -> Option<Pattern> { 96 let mut ret = Vec::new(); 97 let mut level = 0; 98 loop { 99 if self.next_is(&[Base::C]) { 100 self.advance(1); 101 ret.push(PatternItem::Base(Base::I)); 102 } else if self.next_is(&[Base::F]) { 103 self.advance(1); 104 ret.push(PatternItem::Base(Base::C)); 105 } else if self.next_is(&[Base::P]) { 106 self.advance(1); 107 ret.push(PatternItem::Base(Base::F)); 108 } else if self.next_is(&[Base::I, Base::C]) { 109 self.advance(2); 110 ret.push(PatternItem::Base(Base::P)); 111 } else if self.next_is(&[Base::I, Base::P]) { 112 self.advance(2); 113 let n = self.nat()?; 114 ret.push(PatternItem::Skip(n)); 115 } else if self.next_is(&[Base::I, Base::F]) { 116 self.advance(3); 117 let s = self.consts(); 118 ret.push(PatternItem::Search(s)); 119 } else if self.next_is(&[Base::I, Base::I, Base::P]) { 120 self.advance(3); 121 level += 1; 122 ret.push(PatternItem::Open); 123 } else if self.next_is(&[Base::I, Base::I, Base::C]) 124 || self.next_is(&[Base::I, Base::I, Base::F]) 125 { 126 self.advance(3); 127 if level == 0 { 128 return Some(ret); 129 } 130 level -= 1; 131 ret.push(PatternItem::Close); 132 } else if self.next_is(&[Base::I, Base::I, Base::I]) { 133 self.advance(3); 134 self.peek_to(7); 135 let mut r = Vec::new(); 136 mem::swap(&mut r, &mut self.peeked); 137 rna.push(r.try_into().unwrap()); 138 self.index += 7; 139 } else { 140 return None; 141 } 142 } 143 } 144 145 pub fn template(&mut self, rna: &mut Vec<Rna>) -> Option<Template> { 146 let mut ret = Vec::new(); 147 loop { 148 if self.next_is(&[Base::C]) { 149 self.advance(1); 150 ret.push(TemplateItem::Base(Base::I)); 151 } else if self.next_is(&[Base::F]) { 152 self.advance(1); 153 ret.push(TemplateItem::Base(Base::C)); 154 } else if self.next_is(&[Base::P]) { 155 self.advance(1); 156 ret.push(TemplateItem::Base(Base::F)); 157 } else if self.next_is(&[Base::I, Base::C]) { 158 self.advance(2); 159 ret.push(TemplateItem::Base(Base::P)); 160 } else if self.next_is(&[Base::I, Base::P]) || self.next_is(&[Base::I, Base::F]) { 161 self.advance(2); 162 let l = self.nat()?; 163 let n = self.nat()?; 164 ret.push(TemplateItem::Ref(n, l)); 165 } else if self.next_is(&[Base::I, Base::I, Base::C]) 166 || self.next_is(&[Base::I, Base::I, Base::F]) 167 { 168 self.advance(3); 169 return Some(ret); 170 } else if self.next_is(&[Base::I, Base::I, Base::P]) { 171 self.advance(3); 172 let n = self.nat()?; 173 ret.push(TemplateItem::Len(n)); 174 } else if self.next_is(&[Base::I, Base::I, Base::I]) { 175 self.advance(3); 176 self.peek_to(7); 177 let mut r = Vec::new(); 178 mem::swap(&mut r, &mut self.peeked); 179 rna.push(r.try_into().unwrap()); 180 self.index += 7; 181 } else { 182 return None; 183 } 184 } 185 } 186} 187 188#[cfg(test)] 189mod tests { 190 use super::*; 191 192 #[test] 193 fn test_base() { 194 let dna = DnaRef::from_string("ICFP"); 195 let mut parser = Parser::new(&dna); 196 assert!(parser.next_is(&[Base::I])); 197 assert!(parser.next_is(&[Base::I, Base::C])); 198 assert!(parser.next_is(&[Base::I, Base::C, Base::F])); 199 assert!(parser.next_is(&[Base::I, Base::C, Base::F, Base::P])); 200 } 201 202 #[test] 203 fn test_nat() { 204 let dna = DnaRef::from_string("CP"); 205 let mut parser = Parser::new(&dna); 206 assert_eq!(parser.nat(), Some(1)); 207 assert_eq!(parser.index, 2); 208 209 let dna = DnaRef::from_string("ICICP"); 210 let mut parser = Parser::new(&dna); 211 assert_eq!(parser.nat(), Some(10)); 212 assert_eq!(parser.index, 5); 213 214 let dna = DnaRef::from_string("III"); 215 let mut parser = Parser::new(&dna); 216 assert_eq!(parser.nat(), None); 217 assert_eq!(parser.index, 3); 218 } 219 220 #[test] 221 fn test_consts() { 222 let dna = DnaRef::from_string("CFPICIIC"); 223 let mut parser = Parser::new(&dna); 224 assert_eq!(parser.consts(), &[Base::I, Base::C, Base::F, Base::P]); 225 assert_eq!(parser.index, 5); 226 } 227 228 #[test] 229 fn test_pattern() { 230 let mut rna = Vec::new(); 231 232 let dna = DnaRef::from_string("CIIC"); 233 let mut parser = Parser::new(&dna); 234 assert_eq!( 235 parser.pattern(&mut rna), 236 Some(vec![PatternItem::Base(Base::I)]) 237 ); 238 assert_eq!(parser.index, 4); 239 240 let dna = DnaRef::from_string("IIPIPICPIICICIIF"); 241 let mut parser = Parser::new(&dna); 242 assert_eq!( 243 parser.pattern(&mut rna), 244 Some(vec![ 245 PatternItem::Open, 246 PatternItem::Skip(2), 247 PatternItem::Close, 248 PatternItem::Base(Base::P) 249 ]) 250 ); 251 assert_eq!(parser.index, 16); 252 253 let dna = DnaRef::from_string("IIIPFCICFPIIC"); 254 let mut parser = Parser::new(&dna); 255 assert_eq!(parser.pattern(&mut rna), Some(vec![])); 256 assert_eq!(parser.index, 13); 257 assert_eq!( 258 rna, 259 vec![[ 260 Base::P, 261 Base::F, 262 Base::C, 263 Base::I, 264 Base::C, 265 Base::F, 266 Base::P 267 ]] 268 ); 269 } 270 271 #[test] 272 fn test_template() { 273 let mut rna = Vec::new(); 274 275 let dna = DnaRef::from_string("CFPICIFCPICPIIPIICPIIC"); 276 let mut parser = Parser::new(&dna); 277 assert_eq!( 278 parser.template(&mut rna), 279 Some(vec![ 280 TemplateItem::Base(Base::I), 281 TemplateItem::Base(Base::C), 282 TemplateItem::Base(Base::F), 283 TemplateItem::Base(Base::P), 284 TemplateItem::Ref(2, 1), 285 TemplateItem::Len(4) 286 ]) 287 ); 288 assert_eq!(parser.index, 22); 289 290 let dna = DnaRef::from_string("IIIPFCICFPIIC"); 291 let mut parser = Parser::new(&dna); 292 assert_eq!(parser.template(&mut rna), Some(vec![])); 293 assert_eq!(parser.index, 13); 294 assert_eq!( 295 rna, 296 vec![[ 297 Base::P, 298 Base::F, 299 Base::C, 300 Base::I, 301 Base::C, 302 Base::F, 303 Base::P 304 ]] 305 ); 306 } 307}