this repo has no description
at develop 21 kB view raw
1/* -*- mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- */ 2 3/* 4 * Main authors: 5 * Guido Tack <guido.tack@monash.edu> 6 */ 7 8/* This Source Code Form is subject to the terms of the Mozilla Public 9 * License, v. 2.0. If a copy of the MPL was not distributed with this 10 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 11 12#include <minizinc/file_utils.hh> 13#include <minizinc/iter.hh> 14#include <minizinc/json_parser.hh> 15 16#include <fstream> 17#include <sstream> 18#include <utility> 19 20using namespace std; 21 22namespace MiniZinc { 23 24class JSONParser::Token { 25public: 26 TokenT t; 27 28protected: 29 Token(TokenT t0) : t(t0) {} 30 31public: 32 Token() : t(T_EOF) {} 33 std::string s; 34 int i; 35 double d; 36 bool b; 37 Token(std::string s0) : t(T_STRING), s(std::move(s0)) {} 38 Token(int i0) : t(T_INT), i(i0), d(i0) {} 39 Token(double d0) : t(T_FLOAT), d(d0) {} 40 Token(bool b0) : t(T_BOOL), i(static_cast<int>(b0)), d(static_cast<double>(b0)), b(b0) {} 41 static Token listOpen() { return Token(T_LIST_OPEN); } 42 static Token listClose() { return Token(T_LIST_CLOSE); } 43 static Token objOpen() { return Token(T_OBJ_OPEN); } 44 static Token objClose() { return Token(T_OBJ_CLOSE); } 45 static Token comma() { return Token(T_COMMA); } 46 static Token colon() { return Token(T_COLON); } 47 static Token eof() { return Token(T_EOF); } 48 static Token null() { return Token(T_NULL); } 49 string toString() const { 50 switch (t) { 51 case T_LIST_OPEN: 52 return "["; 53 case T_LIST_CLOSE: 54 return "]"; 55 case T_OBJ_OPEN: 56 return "{"; 57 case T_OBJ_CLOSE: 58 return "}"; 59 case T_COMMA: 60 return ","; 61 case T_COLON: 62 return ":"; 63 case T_STRING: 64 return "\"" + s + "\""; 65 case T_INT: { 66 std::stringstream ss; 67 ss << i; 68 return ss.str(); 69 } 70 case T_FLOAT: { 71 std::stringstream ss; 72 ss << d; 73 return ss.str(); 74 } 75 case T_BOOL: 76 return b ? "true" : "false"; 77 case T_NULL: 78 return "null"; 79 case T_EOF: 80 return "eof"; 81 } 82 return "UNKNOWN"; 83 } 84}; 85 86Location JSONParser::errLocation() const { 87 Location loc(_filename, _line, _column, _line, _column); 88 return loc; 89} 90 91JSONParser::Token JSONParser::readToken(istream& is) { 92 string result; 93 char buf[1]; 94 enum { S_NOTHING, S_STRING, S_STRING_ESCAPE, S_INT, S_FLOAT } state; 95 state = S_NOTHING; 96 while (is.good()) { 97 is.read(buf, sizeof(buf)); 98 _column += sizeof(buf); 99 if (is.eof()) { 100 return Token::eof(); 101 } 102 if (!is.good()) { 103 throw JSONError(_env, errLocation(), "tokenization failed"); 104 } 105 switch (state) { 106 case S_NOTHING: 107 switch (buf[0]) { 108 case '\n': 109 _line++; 110 _column = 0; 111 // fall through 112 case ' ': 113 case '\t': 114 case '\r': 115 break; 116 case '[': 117 return Token::listOpen(); 118 case ']': 119 return Token::listClose(); 120 case '{': 121 return Token::objOpen(); 122 case '}': 123 return Token::objClose(); 124 case ',': 125 return Token::comma(); 126 case ':': 127 return Token::colon(); 128 case '"': 129 result = ""; 130 state = S_STRING; 131 break; 132 case 't': { 133 char rest[3]; 134 is.read(rest, sizeof(rest)); 135 _column += sizeof(rest); 136 if (!is.good() || std::strncmp(rest, "rue", 3) != 0) { 137 throw JSONError(_env, errLocation(), "unexpected token `" + string(rest) + "'"); 138 } 139 state = S_NOTHING; 140 return Token(true); 141 } break; 142 case 'f': { 143 char rest[4]; 144 is.read(rest, sizeof(rest)); 145 _column += sizeof(rest); 146 if (!is.good() || std::strncmp(rest, "alse", 4) != 0) { 147 throw JSONError(_env, errLocation(), "unexpected token `" + string(rest) + "'"); 148 } 149 state = S_NOTHING; 150 return Token(false); 151 } break; 152 case 'n': { 153 char rest[3]; 154 is.read(rest, sizeof(rest)); 155 _column += sizeof(rest); 156 if (!is.good() || std::strncmp(rest, "ull", 3) != 0) { 157 throw JSONError(_env, errLocation(), "unexpected token `" + string(rest) + "'"); 158 } 159 state = S_NOTHING; 160 return Token::null(); 161 } break; 162 default: 163 if ((buf[0] >= '0' && buf[0] <= '9') || (buf[0] == '-')) { 164 result = buf[0]; 165 state = S_INT; 166 } else { 167 throw JSONError(_env, errLocation(), "unexpected token `" + string(1, buf[0]) + "'"); 168 } 169 break; 170 } 171 break; 172 case S_STRING_ESCAPE: 173 switch (buf[0]) { 174 case 'n': 175 result += "\n"; 176 break; 177 case 't': 178 result += "\t"; 179 break; 180 case '"': 181 result += "\""; 182 break; 183 case '\\': 184 result += "\\"; 185 break; 186 default: 187 result += "\\"; 188 result += buf[0]; 189 break; 190 } 191 state = S_STRING; 192 break; 193 case S_STRING: 194 if (buf[0] == '"') { 195 state = S_NOTHING; 196 return Token(result); 197 } 198 if (buf[0] == '\\') { 199 state = S_STRING_ESCAPE; 200 } else { 201 result += buf[0]; 202 } 203 break; 204 case S_INT: 205 if (buf[0] == '.') { 206 result += buf[0]; 207 state = S_FLOAT; 208 } else if (buf[0] >= '0' && buf[0] <= '9') { 209 result += buf[0]; 210 } else { 211 is.unget(); 212 std::istringstream iss(result); 213 int v; 214 iss >> v; 215 state = S_NOTHING; 216 return Token(v); 217 } 218 break; 219 case S_FLOAT: 220 if (buf[0] >= '0' && buf[0] <= '9') { 221 result += buf[0]; 222 } else { 223 is.unget(); 224 std::istringstream iss(result); 225 double v; 226 iss >> v; 227 state = S_NOTHING; 228 return Token(v); 229 } 230 break; 231 } 232 } 233 if (result.empty()) { 234 // EOF 235 return Token(); 236 } 237 throw JSONError(_env, errLocation(), "unexpected token `" + string(result) + "'"); 238} 239 240void JSONParser::expectToken(istream& is, JSONParser::TokenT t) { 241 Token rt = readToken(is); 242 if (rt.t != t) { 243 throw JSONError(_env, errLocation(), "unexpected token"); 244 } 245} 246 247string JSONParser::expectString(istream& is) { 248 Token rt = readToken(is); 249 if (rt.t != T_STRING) { 250 throw JSONError(_env, errLocation(), "unexpected token, expected string"); 251 } 252 return rt.s; 253} 254 255void JSONParser::expectEof(istream& is) { 256 Token rt = readToken(is); 257 if (rt.t != T_EOF) { 258 throw JSONError(_env, errLocation(), "unexpected token, expected end of file"); 259 } 260} 261 262JSONParser::Token JSONParser::parseEnumString(istream& is) { 263 Token next = readToken(is); 264 if (next.t != T_STRING) { 265 throw JSONError(_env, errLocation(), "invalid enum object"); 266 } 267 if (next.s.empty()) { 268 throw JSONError(_env, errLocation(), "invalid enum identifier"); 269 } 270 size_t nonIdChar = 271 next.s.find_first_not_of("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_"); 272 size_t nonIdBegin = next.s.find_first_of("0123456789_"); 273 if (nonIdChar != std::string::npos || nonIdBegin == 0) { 274 next.s = "'" + next.s + "'"; 275 } 276 return next; 277} 278 279Expression* JSONParser::parseObject(istream& is, bool possibleString) { 280 // precondition: found T_OBJ_OPEN 281 Token objid = readToken(is); 282 if (objid.t != T_STRING) { 283 throw JSONError(_env, errLocation(), "invalid object"); 284 } 285 expectToken(is, T_COLON); 286 if (objid.s == "set") { 287 expectToken(is, T_LIST_OPEN); 288 vector<Token> elems; 289 TokenT listT = T_COLON; // dummy marker 290 for (Token next = readToken(is); next.t != T_LIST_CLOSE; next = readToken(is)) { 291 switch (next.t) { 292 case T_COMMA: 293 break; 294 case T_INT: 295 if (listT == T_STRING || listT == T_OBJ_OPEN) { 296 throw JSONError(_env, errLocation(), "invalid set literal"); 297 } 298 if (listT != T_FLOAT) { 299 listT = T_INT; 300 } 301 elems.push_back(next); 302 elems.push_back(next); 303 break; 304 case T_FLOAT: 305 if (listT == T_STRING || listT == T_OBJ_OPEN) { 306 throw JSONError(_env, errLocation(), "invalid set literal"); 307 } 308 listT = T_FLOAT; 309 elems.push_back(next); 310 elems.push_back(next); 311 break; 312 case T_STRING: 313 if (listT != T_COLON && listT != (possibleString ? T_STRING : T_OBJ_OPEN)) { 314 throw JSONError(_env, errLocation(), "invalid set literal"); 315 } 316 listT = possibleString ? T_STRING : T_OBJ_OPEN; 317 elems.push_back(next); 318 break; 319 case T_BOOL: 320 if (listT == T_STRING || listT == T_OBJ_OPEN) { 321 throw JSONError(_env, errLocation(), "invalid set literal"); 322 } 323 if (listT == T_COLON) { 324 listT = T_BOOL; 325 } 326 elems.push_back(next); 327 break; 328 case T_OBJ_OPEN: { 329 if (listT != T_COLON && listT != T_OBJ_OPEN) { 330 throw JSONError(_env, errLocation(), "invalid set literal"); 331 } 332 listT = T_OBJ_OPEN; 333 Token enumid = readToken(is); 334 if (enumid.t != T_STRING || enumid.s != "e") { 335 throw JSONError(_env, errLocation(), "invalid enum object"); 336 } 337 expectToken(is, T_COLON); 338 Token next = parseEnumString(is); 339 expectToken(is, T_OBJ_CLOSE); 340 elems.push_back(next); 341 break; 342 } 343 case T_LIST_OPEN: 344 if (listT != T_COLON && listT != T_INT && listT != T_FLOAT) { 345 throw JSONError(_env, errLocation(), "invalid set literal"); 346 } 347 348 next = readToken(is); 349 if (next.t == T_INT) { 350 if (listT != T_FLOAT) { 351 listT = T_INT; 352 } 353 } else if (next.t == T_FLOAT) { 354 listT = T_FLOAT; 355 } else { 356 throw JSONError(_env, errLocation(), "invalid set literal"); 357 } 358 elems.push_back(next); 359 360 expectToken(is, T_COMMA); 361 362 next = readToken(is); 363 if (next.t == T_INT) { 364 if (listT != T_FLOAT) { 365 listT = T_INT; 366 } 367 } else if (next.t == T_FLOAT) { 368 listT = T_FLOAT; 369 } else { 370 throw JSONError(_env, errLocation(), "invalid set literal"); 371 } 372 elems.push_back(next); 373 374 expectToken(is, T_LIST_CLOSE); 375 break; 376 default: 377 throw JSONError(_env, errLocation(), "invalid set literal"); 378 } 379 } 380 expectToken(is, T_OBJ_CLOSE); 381 382 if (listT == T_INT) { 383 unsigned int n = elems.size() / 2; 384 auto* res = IntSetVal::a(); 385 for (unsigned int i = 0; i < n; i++) { 386 IntVal m(elems[2 * i].i); 387 IntVal n(elems[2 * i + 1].i); 388 auto* isv = IntSetVal::a(m, n); 389 IntSetRanges isr(isv); 390 IntSetRanges r(res); 391 Ranges::Union<IntVal, IntSetRanges, IntSetRanges> u(isr, r); 392 res = IntSetVal::ai(u); 393 } 394 return new SetLit(Location().introduce(), res); 395 } 396 if (listT == T_FLOAT) { 397 unsigned int n = elems.size() / 2; 398 auto* res = FloatSetVal::a(); 399 for (unsigned int i = 0; i < n; i++) { 400 FloatVal m(elems[2 * i].d); 401 FloatVal n(elems[2 * i + 1].d); 402 auto* fsv = FloatSetVal::a(m, n); 403 FloatSetRanges fsr(fsv); 404 FloatSetRanges r(res); 405 Ranges::Union<FloatVal, FloatSetRanges, FloatSetRanges> u(fsr, r); 406 res = FloatSetVal::ai(u); 407 } 408 return new SetLit(Location().introduce(), res); 409 } 410 411 vector<Expression*> elems_e(elems.size()); 412 switch (listT) { 413 case T_COLON: 414 break; 415 case T_BOOL: 416 for (unsigned int i = 0; i < elems.size(); i++) { 417 elems_e[i] = new BoolLit(Location().introduce(), elems[i].b); 418 } 419 break; 420 case T_STRING: 421 for (unsigned int i = 0; i < elems.size(); i++) { 422 elems_e[i] = new StringLit(Location().introduce(), elems[i].s); 423 } 424 break; 425 case T_OBJ_OPEN: 426 for (unsigned int i = 0; i < elems.size(); i++) { 427 elems_e[i] = new Id(Location().introduce(), ASTString(elems[i].s), nullptr); 428 } 429 break; 430 default: 431 break; 432 } 433 return new SetLit(Location().introduce(), elems_e); 434 } 435 if (objid.s == "e") { 436 Token next = parseEnumString(is); 437 expectToken(is, T_OBJ_CLOSE); 438 return new Id(Location().introduce(), ASTString(next.s), nullptr); 439 } 440 throw JSONError(_env, errLocation(), "invalid object"); 441} 442 443ArrayLit* JSONParser::parseArray(std::istream& is, bool possibleString) { 444 // precondition: opening parenthesis has been read 445 vector<Expression*> exps; 446 vector<pair<int, int> > dims; 447 dims.emplace_back(1, 0); 448 vector<bool> hadDim; 449 hadDim.push_back(false); 450 Token next; 451 for (;;) { 452 next = readToken(is); 453 if (next.t != T_LIST_OPEN) { 454 break; 455 } 456 dims.emplace_back(1, 0); 457 hadDim.push_back(false); 458 } 459 int curDim = static_cast<int>(dims.size()) - 1; 460 for (;;) { 461 switch (next.t) { 462 case T_LIST_CLOSE: 463 hadDim[curDim] = true; 464 curDim--; 465 if (curDim < 0) { 466 goto list_done; 467 } else if (!hadDim[curDim]) { 468 dims[curDim].second++; 469 } 470 break; 471 case T_LIST_OPEN: 472 curDim++; 473 break; 474 case T_COMMA: 475 break; 476 case T_INT: 477 if (!hadDim[curDim]) { 478 dims[curDim].second++; 479 } 480 exps.push_back(IntLit::a(next.i)); 481 break; 482 case T_FLOAT: 483 if (!hadDim[curDim]) { 484 dims[curDim].second++; 485 } 486 exps.push_back(FloatLit::a(next.d)); 487 break; 488 case T_STRING: { 489 if (!hadDim[curDim]) { 490 dims[curDim].second++; 491 } 492 if (possibleString) { 493 exps.push_back(new StringLit(Location().introduce(), next.s)); 494 } else { 495 exps.push_back(new Id(Location().introduce(), ASTString(next.s), nullptr)); 496 } 497 break; 498 } 499 case T_BOOL: 500 if (!hadDim[curDim]) { 501 dims[curDim].second++; 502 } 503 exps.push_back(new BoolLit(Location().introduce(), next.b)); 504 break; 505 case T_NULL: 506 if (!hadDim[curDim]) { 507 dims[curDim].second++; 508 } 509 exps.push_back(constants().absent); 510 break; 511 case T_OBJ_OPEN: 512 if (!hadDim[curDim]) { 513 dims[curDim].second++; 514 } 515 exps.push_back(parseObject(is)); 516 break; 517 default: 518 throw JSONError(_env, errLocation(), "cannot parse JSON file"); 519 break; 520 } 521 next = readToken(is); 522 } 523list_done: 524 unsigned int expectedSize = 1; 525 for (auto& d : dims) { 526 expectedSize *= d.second; 527 } 528 if (exps.size() != expectedSize) { 529 throw JSONError(_env, errLocation(), "mismatch in array dimensions"); 530 /// TODO: check each individual sub-array 531 } 532 return new ArrayLit(Location().introduce(), exps, dims); 533} 534 535Expression* JSONParser::parseExp(std::istream& is, bool parseObjects, bool possibleString) { 536 Token next = readToken(is); 537 switch (next.t) { 538 case T_INT: 539 return IntLit::a(next.i); 540 break; 541 case T_FLOAT: 542 return FloatLit::a(next.d); 543 case T_STRING: 544 if (!possibleString) { 545 return new Id(Location().introduce(), ASTString(next.s), nullptr); 546 } 547 return new StringLit(Location().introduce(), next.s); 548 case T_BOOL: 549 return new BoolLit(Location().introduce(), next.b); 550 case T_NULL: 551 return constants().absent; 552 case T_OBJ_OPEN: 553 return parseObjects ? parseObject(is, possibleString) : nullptr; 554 case T_LIST_OPEN: 555 return parseArray(is, possibleString); 556 default: 557 throw JSONError(_env, errLocation(), "cannot parse JSON file"); 558 break; 559 } 560} 561 562Expression* JSONParser::coerceArray(TypeInst* intendedTI, ArrayLit* al) { 563 assert(al != nullptr); 564 TypeInst& ti = *intendedTI; 565 const Location& loc = al->loc(); 566 567 if (al->size() == 0) { 568 return al; // Nothing to coerce 569 } 570 if (al->dims() != 1 && al->dims() != ti.ranges().size()) { 571 return al; // Incompatible: TypeError will be thrown on original array 572 } 573 574 int missing_index = -1; 575 for (int i = 0; i < ti.ranges().size(); ++i) { 576 TypeInst* nti = ti.ranges()[i]; 577 if (nti->domain() == nullptr) { 578 if (missing_index != -1) { 579 return al; // More than one index set is missing. Cannot compute correct index sets. 580 } 581 missing_index = i; 582 } 583 } 584 585 std::vector<Expression*> args(ti.ranges().size() + 1); 586 Expression* missing_max = missing_index >= 0 ? IntLit::a(al->size()) : nullptr; 587 for (int i = 0; i < ti.ranges().size(); ++i) { 588 if (i != missing_index) { 589 assert(ti.ranges()[i]->domain() != nullptr); 590 args[i] = ti.ranges()[i]->domain(); 591 if (missing_index >= 0) { 592 missing_max = new BinOp(loc.introduce(), missing_max, BOT_IDIV, 593 new Call(Location().introduce(), "card", {args[i]})); 594 } 595 } 596 } 597 if (missing_index >= 0) { 598 args[missing_index] = new BinOp(loc.introduce(), IntLit::a(1), BOT_DOTDOT, missing_max); 599 } 600 args[args.size() - 1] = al; 601 602 std::string name = "array" + std::to_string(ti.ranges().size()) + "d"; 603 Call* c = new Call(al->loc().introduce(), name, args); 604 if (al->dims() != 1) { 605 c->addAnnotation(constants().ann.array_check_form); 606 } 607 return c; 608} 609 610void JSONParser::parseModel(Model* m, std::istream& is, bool isData) { 611 // precondition: found T_OBJ_OPEN 612 ASTStringMap<TypeInst*> knownIds; 613 if (isData) { 614 // Collect known VarDecl ids from model and includes 615 class VarDeclVisitor : public ItemVisitor { 616 private: 617 ASTStringMap<TypeInst*>& _knownIds; 618 619 public: 620 VarDeclVisitor(ASTStringMap<TypeInst*>& knownIds) : _knownIds(knownIds) {} 621 void vVarDeclI(VarDeclI* vdi) { 622 VarDecl* vd = vdi->e(); 623 _knownIds.emplace(vd->id()->str(), vd->ti()); 624 } 625 } _varDecls(knownIds); 626 iter_items(_varDecls, m); 627 } 628 for (;;) { 629 string ident = expectString(is); 630 expectToken(is, T_COLON); 631 auto it = knownIds.find(ident); 632 bool possibleString = it == knownIds.end() || it->second->type().bt() != Type::BT_UNKNOWN; 633 Expression* e = parseExp(is, isData, possibleString); 634 if (ident[0] != '_' && (!isData || it != knownIds.end())) { 635 if (e == nullptr) { 636 // This is a nested object 637 auto* subModel = new Model; 638 parseModel(subModel, is, isData); 639 auto* ii = new IncludeI(Location().introduce(), ident); 640 ii->m(subModel, true); 641 m->addItem(ii); 642 } else { 643 auto* al = e->dynamicCast<ArrayLit>(); 644 if (it != knownIds.end() && al != nullptr) { 645 if (it->second->isarray()) { 646 // Add correct index sets if they are non-standard 647 e = coerceArray(it->second, al); 648 } else if (it->second->type().isSet()) { 649 // Convert array to a set 650 e = new Call(Location().introduce(), "array2set", {al}); 651 } 652 } 653 auto* ai = new AssignI(e->loc().introduce(), ident, e); 654 m->addItem(ai); 655 } 656 } 657 Token next = readToken(is); 658 if (next.t == T_OBJ_CLOSE) { 659 break; 660 } 661 if (next.t != T_COMMA) { 662 throw JSONError(_env, errLocation(), "cannot parse JSON file"); 663 } 664 } 665} 666 667void JSONParser::parse(Model* m, const std::string& filename0, bool isData) { 668 _filename = filename0; 669 ifstream is(FILE_PATH(_filename), ios::in); 670 if (!is.good()) { 671 throw JSONError(_env, Location().introduce(), "cannot open file " + _filename); 672 } 673 _line = 0; 674 _column = 0; 675 expectToken(is, T_OBJ_OPEN); 676 parseModel(m, is, isData); 677 expectEof(is); 678} 679 680void JSONParser::parseFromString(Model* m, const std::string& data, bool isData) { 681 istringstream iss(data); 682 _line = 0; 683 _column = 0; 684 expectToken(iss, T_OBJ_OPEN); 685 parseModel(m, iss, isData); 686 expectEof(iss); 687} 688 689namespace { 690bool is_json(std::istream& is) { 691 while (is.good()) { 692 char c = is.get(); 693 if (c == '{') { 694 return true; 695 } 696 if (c != ' ' && c != '\n' && c != '\t' && c != '\r') { 697 return false; 698 } 699 } 700 return false; 701} 702} // namespace 703 704bool JSONParser::stringIsJSON(const std::string& data) { 705 std::istringstream iss(data); 706 return is_json(iss); 707} 708 709bool JSONParser::fileIsJSON(const std::string& filename) { 710 ifstream is(FILE_PATH(filename), ios::in); 711 return is_json(is); 712} 713 714} // namespace MiniZinc