this repo has no description
1/* -*- mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- */
2
3/*
4 * Main authors:
5 * Guido Tack <guido.tack@monash.edu>
6 */
7
8/* This Source Code Form is subject to the terms of the Mozilla Public
9 * License, v. 2.0. If a copy of the MPL was not distributed with this
10 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
11
12#include <minizinc/json_parser.hh>
13
14#include <fstream>
15#include <sstream>
16
17using namespace std;
18
19namespace MiniZinc {
20
21class JSONParser::Token {
22public:
23 TokenT t;
24
25protected:
26 Token(TokenT t0) : t(t0) {}
27
28public:
29 Token(void) : t(T_EOF) {}
30 std::string s;
31 int i;
32 double d;
33 bool b;
34 Token(std::string s0) : t(T_STRING), s(s0) {}
35 Token(int i0) : t(T_INT), i(i0), d(i0) {}
36 Token(double d0) : t(T_FLOAT), d(d0) {}
37 Token(bool b0) : t(T_BOOL), i(b0), d(b0), b(b0) {}
38 static Token listOpen() { return Token(T_LIST_OPEN); }
39 static Token listClose() { return Token(T_LIST_CLOSE); }
40 static Token objOpen() { return Token(T_OBJ_OPEN); }
41 static Token objClose() { return Token(T_OBJ_CLOSE); }
42 static Token comma() { return Token(T_COMMA); }
43 static Token colon() { return Token(T_COLON); }
44 static Token eof() { return Token(T_EOF); }
45 static Token null() { return Token(T_NULL); }
46 string toString(void) {
47 switch (t) {
48 case T_LIST_OPEN:
49 return "[";
50 case T_LIST_CLOSE:
51 return "]";
52 case T_OBJ_OPEN:
53 return "{";
54 case T_OBJ_CLOSE:
55 return "}";
56 case T_COMMA:
57 return ",";
58 case T_COLON:
59 return ":";
60 case T_STRING:
61 return "\"" + s + "\"";
62 case T_INT: {
63 std::stringstream ss;
64 ss << i;
65 return ss.str();
66 }
67 case T_FLOAT: {
68 std::stringstream ss;
69 ss << d;
70 return ss.str();
71 }
72 case T_BOOL:
73 return b ? "true" : "false";
74 case T_NULL:
75 return "null";
76 case T_EOF:
77 return "eof";
78 }
79 }
80};
81
82Location JSONParser::errLocation(void) const {
83 Location loc(filename, line, column, line, column);
84 return loc;
85}
86
87JSONParser::Token JSONParser::readToken(istream& is) {
88 string result;
89 char buf[1];
90 enum { S_NOTHING, S_STRING, S_STRING_ESCAPE, S_INT, S_FLOAT } state;
91 state = S_NOTHING;
92 while (is.good()) {
93 is.read(buf, sizeof(buf));
94 column += sizeof(buf);
95 if (is.eof()) return Token::eof();
96 if (!is.good()) throw JSONError(env, errLocation(), "tokenization failed");
97 switch (state) {
98 case S_NOTHING:
99 switch (buf[0]) {
100 case '\n':
101 line++;
102 column = 0;
103 // fall through
104 case ' ':
105 case '\t':
106 case '\r':
107 break;
108 case '[':
109 return Token::listOpen();
110 case ']':
111 return Token::listClose();
112 case '{':
113 return Token::objOpen();
114 case '}':
115 return Token::objClose();
116 case ',':
117 return Token::comma();
118 case ':':
119 return Token::colon();
120 case '"':
121 result = "";
122 state = S_STRING;
123 break;
124 case 't': {
125 char rest[3];
126 is.read(rest, sizeof(rest));
127 column += sizeof(rest);
128 if (!is.good() || std::strncmp(rest, "rue", 3) != 0)
129 throw JSONError(env, errLocation(), "unexpected token `" + string(rest) + "'");
130 state = S_NOTHING;
131 return Token(true);
132 } break;
133 case 'f': {
134 char rest[4];
135 is.read(rest, sizeof(rest));
136 column += sizeof(rest);
137 if (!is.good() || std::strncmp(rest, "alse", 4) != 0)
138 throw JSONError(env, errLocation(), "unexpected token `" + string(rest) + "'");
139 state = S_NOTHING;
140 return Token(false);
141 } break;
142 case 'n': {
143 char rest[3];
144 is.read(rest, sizeof(rest));
145 column += sizeof(rest);
146 if (!is.good() || std::strncmp(rest, "ull", 3) != 0)
147 throw JSONError(env, errLocation(), "unexpected token `" + string(rest) + "'");
148 state = S_NOTHING;
149 return Token::null();
150 } break;
151 default:
152 if ((buf[0] >= '0' && buf[0] <= '9') || (buf[0] == '-')) {
153 result = buf[0];
154 state = S_INT;
155 } else {
156 throw JSONError(env, errLocation(), "unexpected token `" + string(1, buf[0]) + "'");
157 }
158 break;
159 }
160 break;
161 case S_STRING_ESCAPE:
162 switch (buf[0]) {
163 case 'n':
164 result += "\n";
165 break;
166 case 't':
167 result += "\t";
168 break;
169 case '"':
170 result += "\"";
171 break;
172 case '\\':
173 result += "\\";
174 break;
175 default:
176 result += "\\";
177 result += buf[0];
178 break;
179 }
180 state = S_STRING;
181 break;
182 case S_STRING:
183 if (buf[0] == '"') {
184 state = S_NOTHING;
185 return Token(result);
186 }
187 if (buf[0] == '\\') {
188 state = S_STRING_ESCAPE;
189 } else {
190 result += buf[0];
191 }
192 break;
193 case S_INT:
194 if (buf[0] == '.') {
195 result += buf[0];
196 state = S_FLOAT;
197 } else if (buf[0] >= '0' && buf[0] <= '9') {
198 result += buf[0];
199 } else {
200 is.unget();
201 std::istringstream iss(result);
202 int v;
203 iss >> v;
204 state = S_NOTHING;
205 return Token(v);
206 }
207 break;
208 case S_FLOAT:
209 if (buf[0] >= '0' && buf[0] <= '9') {
210 result += buf[0];
211 } else {
212 is.unget();
213 std::istringstream iss(result);
214 double v;
215 iss >> v;
216 state = S_NOTHING;
217 return Token(v);
218 }
219 break;
220 }
221 }
222 throw JSONError(env, errLocation(), "unexpected token `" + string(result) + "'");
223}
224
225void JSONParser::expectToken(istream& is, JSONParser::TokenT t) {
226 Token rt = readToken(is);
227 if (rt.t != t) {
228 throw JSONError(env, errLocation(), "unexpected token");
229 }
230}
231
232string JSONParser::expectString(istream& is) {
233 Token rt = readToken(is);
234 if (rt.t != T_STRING) {
235 throw JSONError(env, errLocation(), "unexpected token, expected string");
236 }
237 return rt.s;
238}
239
240JSONParser::Token JSONParser::parseEnumString(istream& is) {
241 Token next = readToken(is);
242 if (next.t != T_STRING) {
243 throw JSONError(env, errLocation(), "invalid enum object");
244 }
245 if (next.s.empty()) {
246 throw JSONError(env, errLocation(), "invalid enum identifier");
247 }
248 size_t nonIdChar =
249 next.s.find_first_not_of("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_");
250 size_t nonIdBegin = next.s.find_first_of("0123456789_");
251 if (nonIdChar != std::string::npos || nonIdBegin == 0) {
252 next.s = "'" + next.s + "'";
253 }
254 return next;
255}
256
257Expression* JSONParser::parseObject(istream& is) {
258 // precondition: found T_OBJ_OPEN
259 Token objid = readToken(is);
260 if (objid.t != T_STRING) throw JSONError(env, errLocation(), "invalid object");
261 expectToken(is, T_COLON);
262 if (objid.s == "set") {
263 expectToken(is, T_LIST_OPEN);
264 vector<Token> elems;
265 TokenT listT = T_COLON; // dummy marker
266 for (Token next = readToken(is); next.t != T_LIST_CLOSE; next = readToken(is)) {
267 switch (next.t) {
268 case T_COMMA:
269 break;
270 case T_INT:
271 if (listT == T_STRING || listT == T_OBJ_OPEN)
272 throw JSONError(env, errLocation(), "invalid set literal");
273 if (listT != T_FLOAT) listT = T_INT;
274 elems.push_back(next);
275 break;
276 case T_FLOAT:
277 if (listT == T_STRING || listT == T_OBJ_OPEN)
278 throw JSONError(env, errLocation(), "invalid set literal");
279 listT = T_FLOAT;
280 elems.push_back(next);
281 break;
282 case T_STRING:
283 if (listT != T_COLON && listT != T_STRING)
284 throw JSONError(env, errLocation(), "invalid set literal");
285 listT = T_STRING;
286 elems.push_back(next);
287 break;
288 case T_BOOL:
289 if (listT == T_STRING || listT == T_OBJ_OPEN)
290 throw JSONError(env, errLocation(), "invalid set literal");
291 if (listT == T_COLON) listT = T_BOOL;
292 elems.push_back(next);
293 break;
294 case T_OBJ_OPEN: {
295 if (listT != T_COLON && listT != T_OBJ_OPEN)
296 throw JSONError(env, errLocation(), "invalid set literal");
297 listT = T_OBJ_OPEN;
298 Token enumid = readToken(is);
299 if (enumid.t != T_STRING || enumid.s != "e")
300 throw JSONError(env, errLocation(), "invalid enum object");
301 expectToken(is, T_COLON);
302 Token next = parseEnumString(is);
303 expectToken(is, T_OBJ_CLOSE);
304 elems.push_back(next);
305 break;
306 }
307 default:
308 throw JSONError(env, errLocation(), "invalid set literal");
309 }
310 }
311 expectToken(is, T_OBJ_CLOSE);
312 vector<Expression*> elems_e(elems.size());
313 switch (listT) {
314 case T_COLON:
315 break;
316 case T_BOOL:
317 for (unsigned int i = 0; i < elems.size(); i++) {
318 elems_e[i] = new BoolLit(Location().introduce(), elems[i].b);
319 }
320 break;
321 case T_INT:
322 for (unsigned int i = 0; i < elems.size(); i++) {
323 elems_e[i] = IntLit::a(elems[i].i);
324 }
325 break;
326 case T_FLOAT:
327 for (unsigned int i = 0; i < elems.size(); i++) {
328 elems_e[i] = FloatLit::a(elems[i].d);
329 }
330 break;
331 case T_STRING:
332 for (unsigned int i = 0; i < elems.size(); i++) {
333 elems_e[i] = new StringLit(Location().introduce(), elems[i].s);
334 }
335 break;
336 case T_OBJ_OPEN:
337 for (unsigned int i = 0; i < elems.size(); i++) {
338 elems_e[i] = new Id(Location().introduce(), ASTString(elems[i].s), NULL);
339 }
340 break;
341 default:
342 break;
343 }
344 return new SetLit(Location().introduce(), elems_e);
345 } else if (objid.s == "e") {
346 Token next = parseEnumString(is);
347 expectToken(is, T_OBJ_CLOSE);
348 return new Id(Location().introduce(), ASTString(next.s), NULL);
349 } else {
350 throw JSONError(env, errLocation(), "invalid object");
351 }
352}
353
354ArrayLit* JSONParser::parseArray(std::istream& is) {
355 // precondition: opening parenthesis has been read
356 vector<Expression*> exps;
357 vector<pair<int, int> > dims;
358 dims.emplace_back(1, 0);
359 vector<bool> hadDim;
360 hadDim.push_back(false);
361 Token next;
362 for (;;) {
363 next = readToken(is);
364 if (next.t != T_LIST_OPEN) break;
365 dims.emplace_back(1, 0);
366 hadDim.push_back(false);
367 }
368 int curDim = static_cast<int>(dims.size()) - 1;
369 for (;;) {
370 switch (next.t) {
371 case T_LIST_CLOSE:
372 hadDim[curDim] = true;
373 curDim--;
374 if (curDim < 0) {
375 goto list_done;
376 } else if (!hadDim[curDim]) {
377 dims[curDim].second++;
378 }
379 break;
380 case T_LIST_OPEN:
381 curDim++;
382 break;
383 case T_COMMA:
384 break;
385 case T_INT:
386 if (!hadDim[curDim]) {
387 dims[curDim].second++;
388 }
389 exps.push_back(IntLit::a(next.i));
390 break;
391 case T_FLOAT:
392 if (!hadDim[curDim]) {
393 dims[curDim].second++;
394 }
395 exps.push_back(FloatLit::a(next.d));
396 break;
397 case T_STRING:
398 if (!hadDim[curDim]) {
399 dims[curDim].second++;
400 }
401 exps.push_back(new StringLit(Location().introduce(), next.s));
402 break;
403 case T_BOOL:
404 if (!hadDim[curDim]) {
405 dims[curDim].second++;
406 }
407 exps.push_back(new BoolLit(Location().introduce(), next.b));
408 break;
409 case T_NULL:
410 if (!hadDim[curDim]) {
411 dims[curDim].second++;
412 }
413 exps.push_back(constants().absent);
414 break;
415 case T_OBJ_OPEN:
416 if (!hadDim[curDim]) {
417 dims[curDim].second++;
418 }
419 exps.push_back(parseObject(is));
420 break;
421 default:
422 throw JSONError(env, errLocation(), "cannot parse JSON file");
423 break;
424 }
425 next = readToken(is);
426 }
427list_done:
428 unsigned int expectedSize = 1;
429 for (auto& d : dims) {
430 expectedSize *= d.second;
431 }
432 if (exps.size() != expectedSize) {
433 throw JSONError(env, errLocation(), "mismatch in array dimensions");
434 /// TODO: check each individual sub-array
435 }
436 return new ArrayLit(Location().introduce(), exps, dims);
437}
438
439Expression* JSONParser::parseExp(std::istream& is) {
440 Token next = readToken(is);
441 switch (next.t) {
442 case T_INT:
443 return IntLit::a(next.i);
444 break;
445 case T_FLOAT:
446 return FloatLit::a(next.d);
447 case T_STRING:
448 return new StringLit(Location().introduce(), next.s);
449 case T_BOOL:
450 return new BoolLit(Location().introduce(), next.b);
451 case T_NULL:
452 return constants().absent;
453 case T_OBJ_OPEN:
454 return parseObject(is);
455 case T_LIST_OPEN:
456 return parseArray(is);
457 default:
458 throw JSONError(env, errLocation(), "cannot parse JSON file");
459 break;
460 }
461}
462
463void JSONParser::parse(Model* m, std::istream& is) {
464 line = 0;
465 column = 0;
466 expectToken(is, T_OBJ_OPEN);
467 for (;;) {
468 string ident = expectString(is);
469 expectToken(is, T_COLON);
470 Expression* e = parseExp(is);
471 if (ident[0] != '_') {
472 AssignI* ai = new AssignI(Location().introduce(), ident, e);
473 m->addItem(ai);
474 }
475 Token next = readToken(is);
476 if (next.t == T_OBJ_CLOSE) break;
477 if (next.t != T_COMMA) throw JSONError(env, errLocation(), "cannot parse JSON file");
478 }
479}
480
481void JSONParser::parse(Model* m, const std::string& filename0) {
482 filename = filename0;
483 ifstream is;
484 is.open(filename, ios::in);
485 if (!is.good()) {
486 throw JSONError(env, Location().introduce(), "cannot open file " + filename);
487 }
488 parse(m, is);
489}
490
491void JSONParser::parseFromString(Model* m, const std::string& data) {
492 istringstream iss(data);
493 line = 0;
494 column = 0;
495 parse(m, iss);
496}
497
498namespace {
499bool isJSON(std::istream& is) {
500 while (is.good()) {
501 char c = is.get();
502 if (c == '{') return true;
503 if (c != ' ' && c != '\n' && c != '\t' && c != '\r') return false;
504 }
505 return false;
506}
507} // namespace
508
509bool JSONParser::stringIsJSON(const std::string& data) {
510 std::istringstream iss(data);
511 return isJSON(iss);
512}
513
514bool JSONParser::fileIsJSON(const std::string& filename) {
515 ifstream is;
516 is.open(filename, ios::in);
517 return isJSON(is);
518}
519
520} // namespace MiniZinc