this repo has no description
1/* -*- mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- */
2
3/*
4 * Main authors:
5 * Jip J. Dekker <jip.dekker@monash.edu>
6 */
7
8/* This Source Code Form is subject to the terms of the Mozilla Public
9 * License, v. 2.0. If a copy of the MPL was not distributed with this
10 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
11
12%define api.header.include {<minizinc/support/regex_parser.tab.hh>}
13
14%{
15#include <cstdlib>
16#include <cstdio>
17#include <memory>
18#include <unordered_set>
19#include <unordered_map>
20
21#include <gecode/minimodel.hh>
22#include <minizinc/values.hh>
23using namespace Gecode;
24using namespace MiniZinc;
25
26typedef struct yy_buffer_state *YY_BUFFER_STATE;
27YY_BUFFER_STATE regex_yy_scan_string ( const char* yy_str );
28
29extern int yylex();
30extern FILE* yyin;
31
32typedef struct REContext{
33 REG* expr;
34 const IntSetVal& dom;
35 const std::unordered_map<std::string, int>& idMap;
36} REContext;
37
38void yyerror(REContext& ctx, const char* s);
39%}
40
41%union {
42 int iValue;
43 char* sValue;
44 std::unordered_set<int>* setValue;
45 Gecode::REG* rValue;
46}
47%parse-param {REContext& ctx}
48
49%token<iValue> R_INTEGER
50%token R_GROUP_OPEN "("
51%token R_GROUP_CLOSE ")"
52%token R_STAR "*"
53%token R_PLUS "+"
54%token R_ANY "."
55%token R_UNION "|"
56%token R_OPTIONAL "?"
57%token R_QUANT_OPEN "{"
58%token R_QUANT_CLOSE "}"
59%token R_COMMA ","
60%token R_CLASS_OPEN "["
61%token R_CLASS_CLOSE "]"
62%token R_CLASS_RANGE "-"
63%token R_CLASS_NEG "^"
64%token<sValue> R_IDENTIFIER
65
66%type<iValue> literal
67%type<rValue> regex expression term factor atom
68%type<setValue> set_item set_items
69
70%start regex
71
72%%
73
74regex:
75 expression
76 {
77 *ctx.expr = (*$1);
78 delete $1;
79 }
80
81expression:
82 term
83 | term "|" expression
84 {
85 *$1 = *$1 | *$3;
86 delete $3;
87 $$ = $1;
88 }
89
90term:
91 factor
92 | factor term
93 {
94 *$1 = *$1 + *$2;
95 delete $2;
96 $$ = $1;
97 }
98
99factor:
100 atom
101 | atom "*"
102 {
103 *$1 = *(*$1);
104 $$ = $1;
105 }
106 | atom "+"
107 {
108 *$1 = +(*$1);
109 $$ = $1;
110 }
111 | atom "?"
112 {
113 *$1 = (*$1)(0, 1);
114 $$ = $1;
115 }
116 | atom "{" R_INTEGER "}"
117 {
118 *$1 = (*$1)($3, $3);
119 $$ = $1;
120 }
121 | atom "{" R_INTEGER "," "}"
122 {
123 *$1 = (*$1)($3);
124 $$ = $1;
125 }
126 | atom "{" R_INTEGER "," R_INTEGER "}"
127 {
128 *$1 = (*$1)($3, $5);
129 $$ = $1;
130 }
131
132atom:
133 literal
134 { $$ = new REG($1); }
135 | "."
136 {
137 IntArgs range = IntArgs::create(ctx.dom.max().toInt() - ctx.dom.min().toInt() + 1, ctx.dom.min().toInt());
138 $$ = new REG(range);
139 }
140 | "[" set_items "]"
141 {
142 std::vector<int> v;
143 v.reserve($2->size());
144 for(auto i : *$2) {
145 v.push_back(i);
146 }
147 delete $2;
148 $$ = new REG(IntArgs(v));
149 }
150 | "[" "^" set_items "]"
151 {
152 std::vector<int> diff;
153 std::unordered_set<int> domain;
154 for(int i = ctx.dom.min().toInt(); i<=ctx.dom.max().toInt(); ++i) {
155 domain.insert(i);
156 }
157 std::set_difference(
158 domain.begin(), domain.end(),
159 $3->begin(), $3->end(),
160 std::inserter(diff, diff.begin())
161 );
162 delete $3;
163 $$ = new REG(IntArgs(diff));
164 }
165 | "(" expression ")"
166 { $$ = $2; }
167
168set_items:
169 set_item
170 | set_item set_items
171 {
172 $$ = $1;
173 for (auto i : *$2) {
174 $1->insert(i);
175 }
176 delete $2;
177 }
178
179set_item:
180 literal
181 {
182 $$ = new std::unordered_set<int>({$1});
183 }
184 | literal "-" literal
185 {
186 int from = $1;
187 int to = $3;
188 if (to < from) {
189 std::swap(from,to);
190 }
191 $$ = new std::unordered_set<int>;
192 for(int i = from; i<=to; ++i) {
193 $$->insert(i);
194 }
195 }
196
197literal:
198 R_INTEGER
199 | R_IDENTIFIER
200 {
201 std::string s($1);
202 auto find = ctx.idMap.find(s);
203 if (find == ctx.idMap.end()) {
204 throw std::runtime_error("Unknown identifier: " + s);
205 }
206 $$ = find->second;
207 }
208
209%%
210
211void yyerror(REContext& ctx, const char* s) {
212 // TODO: Add error location
213 throw std::runtime_error("Cannot parse regular expression: " + std::string(s));
214}
215
216std::unique_ptr<REG> regex_from_string(const std::string& regex_str, const IntSetVal& domain, const std::unordered_map<std::string, int>& identifiers) {
217 REG* expr = new REG();
218 regex_yy_scan_string(regex_str.c_str());
219 REContext rctx = REContext{expr, domain, identifiers};
220 int err = yyparse(rctx);
221 if (err != 0) {
222 throw std::runtime_error("Cannot parse regular expression, error code " + std::to_string(err));
223 }
224 return std::unique_ptr<REG>(expr);
225}