this repo has no description
1/* -*- mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- */
2
3/*
4 * Main authors:
5 * Guido Tack <guido.tack@monash.edu>
6 */
7
8/* This Source Code Form is subject to the terms of the Mozilla Public
9 * License, v. 2.0. If a copy of the MPL was not distributed with this
10 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
11
12#include <minizinc/file_utils.hh>
13#include <minizinc/iter.hh>
14#include <minizinc/json_parser.hh>
15
16#include <fstream>
17#include <sstream>
18#include <utility>
19
20using namespace std;
21
22namespace MiniZinc {
23
24class JSONParser::Token {
25public:
26 TokenT t;
27
28protected:
29 Token(TokenT t0) : t(t0) {}
30
31public:
32 Token() : t(T_EOF) {}
33 std::string s;
34 int i;
35 double d;
36 bool b;
37 Token(std::string s0) : t(T_STRING), s(std::move(s0)) {}
38 Token(int i0) : t(T_INT), i(i0), d(i0) {}
39 Token(double d0) : t(T_FLOAT), d(d0) {}
40 Token(bool b0) : t(T_BOOL), i(static_cast<int>(b0)), d(static_cast<double>(b0)), b(b0) {}
41 static Token listOpen() { return Token(T_LIST_OPEN); }
42 static Token listClose() { return Token(T_LIST_CLOSE); }
43 static Token objOpen() { return Token(T_OBJ_OPEN); }
44 static Token objClose() { return Token(T_OBJ_CLOSE); }
45 static Token comma() { return Token(T_COMMA); }
46 static Token colon() { return Token(T_COLON); }
47 static Token eof() { return Token(T_EOF); }
48 static Token null() { return Token(T_NULL); }
49 string toString() const {
50 switch (t) {
51 case T_LIST_OPEN:
52 return "[";
53 case T_LIST_CLOSE:
54 return "]";
55 case T_OBJ_OPEN:
56 return "{";
57 case T_OBJ_CLOSE:
58 return "}";
59 case T_COMMA:
60 return ",";
61 case T_COLON:
62 return ":";
63 case T_STRING:
64 return "\"" + s + "\"";
65 case T_INT: {
66 std::stringstream ss;
67 ss << i;
68 return ss.str();
69 }
70 case T_FLOAT: {
71 std::stringstream ss;
72 ss << d;
73 return ss.str();
74 }
75 case T_BOOL:
76 return b ? "true" : "false";
77 case T_NULL:
78 return "null";
79 case T_EOF:
80 return "eof";
81 }
82 return "UNKNOWN";
83 }
84};
85
86Location JSONParser::errLocation() const {
87 Location loc(_filename, _line, _column, _line, _column);
88 return loc;
89}
90
91JSONParser::Token JSONParser::readToken(istream& is) {
92 string result;
93 char buf[1];
94 enum { S_NOTHING, S_STRING, S_STRING_ESCAPE, S_INT, S_FLOAT } state;
95 state = S_NOTHING;
96 while (is.good()) {
97 is.read(buf, sizeof(buf));
98 _column += sizeof(buf);
99 if (is.eof()) {
100 return Token::eof();
101 }
102 if (!is.good()) {
103 throw JSONError(_env, errLocation(), "tokenization failed");
104 }
105 switch (state) {
106 case S_NOTHING:
107 switch (buf[0]) {
108 case '\n':
109 _line++;
110 _column = 0;
111 // fall through
112 case ' ':
113 case '\t':
114 case '\r':
115 break;
116 case '[':
117 return Token::listOpen();
118 case ']':
119 return Token::listClose();
120 case '{':
121 return Token::objOpen();
122 case '}':
123 return Token::objClose();
124 case ',':
125 return Token::comma();
126 case ':':
127 return Token::colon();
128 case '"':
129 result = "";
130 state = S_STRING;
131 break;
132 case 't': {
133 char rest[3];
134 is.read(rest, sizeof(rest));
135 _column += sizeof(rest);
136 if (!is.good() || std::strncmp(rest, "rue", 3) != 0) {
137 throw JSONError(_env, errLocation(), "unexpected token `" + string(rest) + "'");
138 }
139 state = S_NOTHING;
140 return Token(true);
141 } break;
142 case 'f': {
143 char rest[4];
144 is.read(rest, sizeof(rest));
145 _column += sizeof(rest);
146 if (!is.good() || std::strncmp(rest, "alse", 4) != 0) {
147 throw JSONError(_env, errLocation(), "unexpected token `" + string(rest) + "'");
148 }
149 state = S_NOTHING;
150 return Token(false);
151 } break;
152 case 'n': {
153 char rest[3];
154 is.read(rest, sizeof(rest));
155 _column += sizeof(rest);
156 if (!is.good() || std::strncmp(rest, "ull", 3) != 0) {
157 throw JSONError(_env, errLocation(), "unexpected token `" + string(rest) + "'");
158 }
159 state = S_NOTHING;
160 return Token::null();
161 } break;
162 default:
163 if ((buf[0] >= '0' && buf[0] <= '9') || (buf[0] == '-')) {
164 result = buf[0];
165 state = S_INT;
166 } else {
167 throw JSONError(_env, errLocation(), "unexpected token `" + string(1, buf[0]) + "'");
168 }
169 break;
170 }
171 break;
172 case S_STRING_ESCAPE:
173 switch (buf[0]) {
174 case 'n':
175 result += "\n";
176 break;
177 case 't':
178 result += "\t";
179 break;
180 case '"':
181 result += "\"";
182 break;
183 case '\\':
184 result += "\\";
185 break;
186 default:
187 result += "\\";
188 result += buf[0];
189 break;
190 }
191 state = S_STRING;
192 break;
193 case S_STRING:
194 if (buf[0] == '"') {
195 state = S_NOTHING;
196 return Token(result);
197 }
198 if (buf[0] == '\\') {
199 state = S_STRING_ESCAPE;
200 } else {
201 result += buf[0];
202 }
203 break;
204 case S_INT:
205 if (buf[0] == '.') {
206 result += buf[0];
207 state = S_FLOAT;
208 } else if (buf[0] >= '0' && buf[0] <= '9') {
209 result += buf[0];
210 } else {
211 is.unget();
212 std::istringstream iss(result);
213 int v;
214 iss >> v;
215 state = S_NOTHING;
216 return Token(v);
217 }
218 break;
219 case S_FLOAT:
220 if (buf[0] >= '0' && buf[0] <= '9') {
221 result += buf[0];
222 } else {
223 is.unget();
224 std::istringstream iss(result);
225 double v;
226 iss >> v;
227 state = S_NOTHING;
228 return Token(v);
229 }
230 break;
231 }
232 }
233 if (result.empty()) {
234 // EOF
235 return Token();
236 }
237 throw JSONError(_env, errLocation(), "unexpected token `" + string(result) + "'");
238}
239
240void JSONParser::expectToken(istream& is, JSONParser::TokenT t) {
241 Token rt = readToken(is);
242 if (rt.t != t) {
243 throw JSONError(_env, errLocation(), "unexpected token");
244 }
245}
246
247string JSONParser::expectString(istream& is) {
248 Token rt = readToken(is);
249 if (rt.t != T_STRING) {
250 throw JSONError(_env, errLocation(), "unexpected token, expected string");
251 }
252 return rt.s;
253}
254
255void JSONParser::expectEof(istream& is) {
256 Token rt = readToken(is);
257 if (rt.t != T_EOF) {
258 throw JSONError(_env, errLocation(), "unexpected token, expected end of file");
259 }
260}
261
262JSONParser::Token JSONParser::parseEnumString(istream& is) {
263 Token next = readToken(is);
264 if (next.t != T_STRING) {
265 throw JSONError(_env, errLocation(), "invalid enum object");
266 }
267 if (next.s.empty()) {
268 throw JSONError(_env, errLocation(), "invalid enum identifier");
269 }
270 size_t nonIdChar =
271 next.s.find_first_not_of("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_");
272 size_t nonIdBegin = next.s.find_first_of("0123456789_");
273 if (nonIdChar != std::string::npos || nonIdBegin == 0) {
274 next.s = "'" + next.s + "'";
275 }
276 return next;
277}
278
279Expression* JSONParser::parseObject(istream& is, bool possibleString) {
280 // precondition: found T_OBJ_OPEN
281 Token objid = readToken(is);
282 if (objid.t != T_STRING) {
283 throw JSONError(_env, errLocation(), "invalid object");
284 }
285 expectToken(is, T_COLON);
286 if (objid.s == "set") {
287 expectToken(is, T_LIST_OPEN);
288 vector<Token> elems;
289 TokenT listT = T_COLON; // dummy marker
290 for (Token next = readToken(is); next.t != T_LIST_CLOSE; next = readToken(is)) {
291 switch (next.t) {
292 case T_COMMA:
293 break;
294 case T_INT:
295 if (listT == T_STRING || listT == T_OBJ_OPEN) {
296 throw JSONError(_env, errLocation(), "invalid set literal");
297 }
298 if (listT != T_FLOAT) {
299 listT = T_INT;
300 }
301 elems.push_back(next);
302 elems.push_back(next);
303 break;
304 case T_FLOAT:
305 if (listT == T_STRING || listT == T_OBJ_OPEN) {
306 throw JSONError(_env, errLocation(), "invalid set literal");
307 }
308 listT = T_FLOAT;
309 elems.push_back(next);
310 elems.push_back(next);
311 break;
312 case T_STRING:
313 if (listT != T_COLON && listT != (possibleString ? T_STRING : T_OBJ_OPEN)) {
314 throw JSONError(_env, errLocation(), "invalid set literal");
315 }
316 listT = possibleString ? T_STRING : T_OBJ_OPEN;
317 elems.push_back(next);
318 break;
319 case T_BOOL:
320 if (listT == T_STRING || listT == T_OBJ_OPEN) {
321 throw JSONError(_env, errLocation(), "invalid set literal");
322 }
323 if (listT == T_COLON) {
324 listT = T_BOOL;
325 }
326 elems.push_back(next);
327 break;
328 case T_OBJ_OPEN: {
329 if (listT != T_COLON && listT != T_OBJ_OPEN) {
330 throw JSONError(_env, errLocation(), "invalid set literal");
331 }
332 listT = T_OBJ_OPEN;
333 Token enumid = readToken(is);
334 if (enumid.t != T_STRING || enumid.s != "e") {
335 throw JSONError(_env, errLocation(), "invalid enum object");
336 }
337 expectToken(is, T_COLON);
338 Token next = parseEnumString(is);
339 expectToken(is, T_OBJ_CLOSE);
340 elems.push_back(next);
341 break;
342 }
343 case T_LIST_OPEN:
344 if (listT != T_COLON && listT != T_INT && listT != T_FLOAT) {
345 throw JSONError(_env, errLocation(), "invalid set literal");
346 }
347
348 next = readToken(is);
349 if (next.t == T_INT) {
350 if (listT != T_FLOAT) {
351 listT = T_INT;
352 }
353 } else if (next.t == T_FLOAT) {
354 listT = T_FLOAT;
355 } else {
356 throw JSONError(_env, errLocation(), "invalid set literal");
357 }
358 elems.push_back(next);
359
360 expectToken(is, T_COMMA);
361
362 next = readToken(is);
363 if (next.t == T_INT) {
364 if (listT != T_FLOAT) {
365 listT = T_INT;
366 }
367 } else if (next.t == T_FLOAT) {
368 listT = T_FLOAT;
369 } else {
370 throw JSONError(_env, errLocation(), "invalid set literal");
371 }
372 elems.push_back(next);
373
374 expectToken(is, T_LIST_CLOSE);
375 break;
376 default:
377 throw JSONError(_env, errLocation(), "invalid set literal");
378 }
379 }
380 expectToken(is, T_OBJ_CLOSE);
381
382 if (listT == T_INT) {
383 unsigned int n = elems.size() / 2;
384 auto* res = IntSetVal::a();
385 for (unsigned int i = 0; i < n; i++) {
386 IntVal m(elems[2 * i].i);
387 IntVal n(elems[2 * i + 1].i);
388 auto* isv = IntSetVal::a(m, n);
389 IntSetRanges isr(isv);
390 IntSetRanges r(res);
391 Ranges::Union<IntVal, IntSetRanges, IntSetRanges> u(isr, r);
392 res = IntSetVal::ai(u);
393 }
394 return new SetLit(Location().introduce(), res);
395 }
396 if (listT == T_FLOAT) {
397 unsigned int n = elems.size() / 2;
398 auto* res = FloatSetVal::a();
399 for (unsigned int i = 0; i < n; i++) {
400 FloatVal m(elems[2 * i].d);
401 FloatVal n(elems[2 * i + 1].d);
402 auto* fsv = FloatSetVal::a(m, n);
403 FloatSetRanges fsr(fsv);
404 FloatSetRanges r(res);
405 Ranges::Union<FloatVal, FloatSetRanges, FloatSetRanges> u(fsr, r);
406 res = FloatSetVal::ai(u);
407 }
408 return new SetLit(Location().introduce(), res);
409 }
410
411 vector<Expression*> elems_e(elems.size());
412 switch (listT) {
413 case T_COLON:
414 break;
415 case T_BOOL:
416 for (unsigned int i = 0; i < elems.size(); i++) {
417 elems_e[i] = new BoolLit(Location().introduce(), elems[i].b);
418 }
419 break;
420 case T_STRING:
421 for (unsigned int i = 0; i < elems.size(); i++) {
422 elems_e[i] = new StringLit(Location().introduce(), elems[i].s);
423 }
424 break;
425 case T_OBJ_OPEN:
426 for (unsigned int i = 0; i < elems.size(); i++) {
427 elems_e[i] = new Id(Location().introduce(), ASTString(elems[i].s), nullptr);
428 }
429 break;
430 default:
431 break;
432 }
433 return new SetLit(Location().introduce(), elems_e);
434 }
435 if (objid.s == "e") {
436 Token next = parseEnumString(is);
437 expectToken(is, T_OBJ_CLOSE);
438 return new Id(Location().introduce(), ASTString(next.s), nullptr);
439 }
440 throw JSONError(_env, errLocation(), "invalid object");
441}
442
443ArrayLit* JSONParser::parseArray(std::istream& is, bool possibleString) {
444 // precondition: opening parenthesis has been read
445 vector<Expression*> exps;
446 vector<pair<int, int> > dims;
447 dims.emplace_back(1, 0);
448 vector<bool> hadDim;
449 hadDim.push_back(false);
450 Token next;
451 for (;;) {
452 next = readToken(is);
453 if (next.t != T_LIST_OPEN) {
454 break;
455 }
456 dims.emplace_back(1, 0);
457 hadDim.push_back(false);
458 }
459 int curDim = static_cast<int>(dims.size()) - 1;
460 for (;;) {
461 switch (next.t) {
462 case T_LIST_CLOSE:
463 hadDim[curDim] = true;
464 curDim--;
465 if (curDim < 0) {
466 goto list_done;
467 } else if (!hadDim[curDim]) {
468 dims[curDim].second++;
469 }
470 break;
471 case T_LIST_OPEN:
472 curDim++;
473 break;
474 case T_COMMA:
475 break;
476 case T_INT:
477 if (!hadDim[curDim]) {
478 dims[curDim].second++;
479 }
480 exps.push_back(IntLit::a(next.i));
481 break;
482 case T_FLOAT:
483 if (!hadDim[curDim]) {
484 dims[curDim].second++;
485 }
486 exps.push_back(FloatLit::a(next.d));
487 break;
488 case T_STRING: {
489 if (!hadDim[curDim]) {
490 dims[curDim].second++;
491 }
492 if (possibleString) {
493 exps.push_back(new StringLit(Location().introduce(), next.s));
494 } else {
495 exps.push_back(new Id(Location().introduce(), ASTString(next.s), nullptr));
496 }
497 break;
498 }
499 case T_BOOL:
500 if (!hadDim[curDim]) {
501 dims[curDim].second++;
502 }
503 exps.push_back(new BoolLit(Location().introduce(), next.b));
504 break;
505 case T_NULL:
506 if (!hadDim[curDim]) {
507 dims[curDim].second++;
508 }
509 exps.push_back(constants().absent);
510 break;
511 case T_OBJ_OPEN:
512 if (!hadDim[curDim]) {
513 dims[curDim].second++;
514 }
515 exps.push_back(parseObject(is));
516 break;
517 default:
518 throw JSONError(_env, errLocation(), "cannot parse JSON file");
519 break;
520 }
521 next = readToken(is);
522 }
523list_done:
524 unsigned int expectedSize = 1;
525 for (auto& d : dims) {
526 expectedSize *= d.second;
527 }
528 if (exps.size() != expectedSize) {
529 throw JSONError(_env, errLocation(), "mismatch in array dimensions");
530 /// TODO: check each individual sub-array
531 }
532 return new ArrayLit(Location().introduce(), exps, dims);
533}
534
535Expression* JSONParser::parseExp(std::istream& is, bool parseObjects, bool possibleString) {
536 Token next = readToken(is);
537 switch (next.t) {
538 case T_INT:
539 return IntLit::a(next.i);
540 break;
541 case T_FLOAT:
542 return FloatLit::a(next.d);
543 case T_STRING:
544 if (!possibleString) {
545 return new Id(Location().introduce(), ASTString(next.s), nullptr);
546 }
547 return new StringLit(Location().introduce(), next.s);
548 case T_BOOL:
549 return new BoolLit(Location().introduce(), next.b);
550 case T_NULL:
551 return constants().absent;
552 case T_OBJ_OPEN:
553 return parseObjects ? parseObject(is, possibleString) : nullptr;
554 case T_LIST_OPEN:
555 return parseArray(is, possibleString);
556 default:
557 throw JSONError(_env, errLocation(), "cannot parse JSON file");
558 break;
559 }
560}
561
562Expression* JSONParser::coerceArray(TypeInst* intendedTI, ArrayLit* al) {
563 assert(al != nullptr);
564 TypeInst& ti = *intendedTI;
565 const Location& loc = al->loc();
566
567 if (al->size() == 0) {
568 return al; // Nothing to coerce
569 }
570 if (al->dims() != 1 && al->dims() != ti.ranges().size()) {
571 return al; // Incompatible: TypeError will be thrown on original array
572 }
573
574 int missing_index = -1;
575 for (int i = 0; i < ti.ranges().size(); ++i) {
576 TypeInst* nti = ti.ranges()[i];
577 if (nti->domain() == nullptr) {
578 if (missing_index != -1) {
579 return al; // More than one index set is missing. Cannot compute correct index sets.
580 }
581 missing_index = i;
582 }
583 }
584
585 std::vector<Expression*> args(ti.ranges().size() + 1);
586 Expression* missing_max = missing_index >= 0 ? IntLit::a(al->size()) : nullptr;
587 for (int i = 0; i < ti.ranges().size(); ++i) {
588 if (i != missing_index) {
589 assert(ti.ranges()[i]->domain() != nullptr);
590 args[i] = ti.ranges()[i]->domain();
591 if (missing_index >= 0) {
592 missing_max = new BinOp(loc.introduce(), missing_max, BOT_IDIV,
593 new Call(Location().introduce(), "card", {args[i]}));
594 }
595 }
596 }
597 if (missing_index >= 0) {
598 args[missing_index] = new BinOp(loc.introduce(), IntLit::a(1), BOT_DOTDOT, missing_max);
599 }
600 args[args.size() - 1] = al;
601
602 std::string name = "array" + std::to_string(ti.ranges().size()) + "d";
603 Call* c = new Call(al->loc().introduce(), name, args);
604 if (al->dims() != 1) {
605 c->addAnnotation(constants().ann.array_check_form);
606 }
607 return c;
608}
609
610void JSONParser::parseModel(Model* m, std::istream& is, bool isData) {
611 // precondition: found T_OBJ_OPEN
612 ASTStringMap<TypeInst*> knownIds;
613 if (isData) {
614 // Collect known VarDecl ids from model and includes
615 class VarDeclVisitor : public ItemVisitor {
616 private:
617 ASTStringMap<TypeInst*>& _knownIds;
618
619 public:
620 VarDeclVisitor(ASTStringMap<TypeInst*>& knownIds) : _knownIds(knownIds) {}
621 void vVarDeclI(VarDeclI* vdi) {
622 VarDecl* vd = vdi->e();
623 _knownIds.emplace(vd->id()->str(), vd->ti());
624 }
625 } _varDecls(knownIds);
626 iter_items(_varDecls, m);
627 }
628 for (;;) {
629 string ident = expectString(is);
630 expectToken(is, T_COLON);
631 auto it = knownIds.find(ident);
632 bool possibleString = it == knownIds.end() || it->second->type().bt() != Type::BT_UNKNOWN;
633 Expression* e = parseExp(is, isData, possibleString);
634 if (ident[0] != '_' && (!isData || it != knownIds.end())) {
635 if (e == nullptr) {
636 // This is a nested object
637 auto* subModel = new Model;
638 parseModel(subModel, is, isData);
639 auto* ii = new IncludeI(Location().introduce(), ident);
640 ii->m(subModel, true);
641 m->addItem(ii);
642 } else {
643 auto* al = e->dynamicCast<ArrayLit>();
644 if (it != knownIds.end() && al != nullptr) {
645 if (it->second->isarray()) {
646 // Add correct index sets if they are non-standard
647 e = coerceArray(it->second, al);
648 } else if (it->second->type().isSet()) {
649 // Convert array to a set
650 e = new Call(Location().introduce(), "array2set", {al});
651 }
652 }
653 auto* ai = new AssignI(e->loc().introduce(), ident, e);
654 m->addItem(ai);
655 }
656 }
657 Token next = readToken(is);
658 if (next.t == T_OBJ_CLOSE) {
659 break;
660 }
661 if (next.t != T_COMMA) {
662 throw JSONError(_env, errLocation(), "cannot parse JSON file");
663 }
664 }
665}
666
667void JSONParser::parse(Model* m, const std::string& filename0, bool isData) {
668 _filename = filename0;
669 ifstream is(FILE_PATH(_filename), ios::in);
670 if (!is.good()) {
671 throw JSONError(_env, Location().introduce(), "cannot open file " + _filename);
672 }
673 _line = 0;
674 _column = 0;
675 expectToken(is, T_OBJ_OPEN);
676 parseModel(m, is, isData);
677 expectEof(is);
678}
679
680void JSONParser::parseFromString(Model* m, const std::string& data, bool isData) {
681 istringstream iss(data);
682 _line = 0;
683 _column = 0;
684 expectToken(iss, T_OBJ_OPEN);
685 parseModel(m, iss, isData);
686 expectEof(iss);
687}
688
689namespace {
690bool is_json(std::istream& is) {
691 while (is.good()) {
692 char c = is.get();
693 if (c == '{') {
694 return true;
695 }
696 if (c != ' ' && c != '\n' && c != '\t' && c != '\r') {
697 return false;
698 }
699 }
700 return false;
701}
702} // namespace
703
704bool JSONParser::stringIsJSON(const std::string& data) {
705 std::istringstream iss(data);
706 return is_json(iss);
707}
708
709bool JSONParser::fileIsJSON(const std::string& filename) {
710 ifstream is(FILE_PATH(filename), ios::in);
711 return is_json(is);
712}
713
714} // namespace MiniZinc