this repo has no description
at develop 15 kB view raw
1/* -*- mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- */ 2 3/* 4 * Main authors: 5 * Guido Tack <guido.tack@monash.edu> 6 */ 7 8/* This Source Code Form is subject to the terms of the Mozilla Public 9 * License, v. 2.0. If a copy of the MPL was not distributed with this 10 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 11 12#include <minizinc/json_parser.hh> 13 14#include <fstream> 15#include <sstream> 16 17using namespace std; 18 19namespace MiniZinc { 20 21class JSONParser::Token { 22public: 23 TokenT t; 24 25protected: 26 Token(TokenT t0) : t(t0) {} 27 28public: 29 Token(void) : t(T_EOF) {} 30 std::string s; 31 int i; 32 double d; 33 bool b; 34 Token(std::string s0) : t(T_STRING), s(s0) {} 35 Token(int i0) : t(T_INT), i(i0), d(i0) {} 36 Token(double d0) : t(T_FLOAT), d(d0) {} 37 Token(bool b0) : t(T_BOOL), i(b0), d(b0), b(b0) {} 38 static Token listOpen() { return Token(T_LIST_OPEN); } 39 static Token listClose() { return Token(T_LIST_CLOSE); } 40 static Token objOpen() { return Token(T_OBJ_OPEN); } 41 static Token objClose() { return Token(T_OBJ_CLOSE); } 42 static Token comma() { return Token(T_COMMA); } 43 static Token colon() { return Token(T_COLON); } 44 static Token eof() { return Token(T_EOF); } 45 static Token null() { return Token(T_NULL); } 46 string toString(void) { 47 switch (t) { 48 case T_LIST_OPEN: 49 return "["; 50 case T_LIST_CLOSE: 51 return "]"; 52 case T_OBJ_OPEN: 53 return "{"; 54 case T_OBJ_CLOSE: 55 return "}"; 56 case T_COMMA: 57 return ","; 58 case T_COLON: 59 return ":"; 60 case T_STRING: 61 return "\"" + s + "\""; 62 case T_INT: { 63 std::stringstream ss; 64 ss << i; 65 return ss.str(); 66 } 67 case T_FLOAT: { 68 std::stringstream ss; 69 ss << d; 70 return ss.str(); 71 } 72 case T_BOOL: 73 return b ? "true" : "false"; 74 case T_NULL: 75 return "null"; 76 case T_EOF: 77 return "eof"; 78 } 79 } 80}; 81 82Location JSONParser::errLocation(void) const { 83 Location loc(filename, line, column, line, column); 84 return loc; 85} 86 87JSONParser::Token JSONParser::readToken(istream& is) { 88 string result; 89 char buf[1]; 90 enum { S_NOTHING, S_STRING, S_STRING_ESCAPE, S_INT, S_FLOAT } state; 91 state = S_NOTHING; 92 while (is.good()) { 93 is.read(buf, sizeof(buf)); 94 column += sizeof(buf); 95 if (is.eof()) return Token::eof(); 96 if (!is.good()) throw JSONError(env, errLocation(), "tokenization failed"); 97 switch (state) { 98 case S_NOTHING: 99 switch (buf[0]) { 100 case '\n': 101 line++; 102 column = 0; 103 // fall through 104 case ' ': 105 case '\t': 106 case '\r': 107 break; 108 case '[': 109 return Token::listOpen(); 110 case ']': 111 return Token::listClose(); 112 case '{': 113 return Token::objOpen(); 114 case '}': 115 return Token::objClose(); 116 case ',': 117 return Token::comma(); 118 case ':': 119 return Token::colon(); 120 case '"': 121 result = ""; 122 state = S_STRING; 123 break; 124 case 't': { 125 char rest[3]; 126 is.read(rest, sizeof(rest)); 127 column += sizeof(rest); 128 if (!is.good() || std::strncmp(rest, "rue", 3) != 0) 129 throw JSONError(env, errLocation(), "unexpected token `" + string(rest) + "'"); 130 state = S_NOTHING; 131 return Token(true); 132 } break; 133 case 'f': { 134 char rest[4]; 135 is.read(rest, sizeof(rest)); 136 column += sizeof(rest); 137 if (!is.good() || std::strncmp(rest, "alse", 4) != 0) 138 throw JSONError(env, errLocation(), "unexpected token `" + string(rest) + "'"); 139 state = S_NOTHING; 140 return Token(false); 141 } break; 142 case 'n': { 143 char rest[3]; 144 is.read(rest, sizeof(rest)); 145 column += sizeof(rest); 146 if (!is.good() || std::strncmp(rest, "ull", 3) != 0) 147 throw JSONError(env, errLocation(), "unexpected token `" + string(rest) + "'"); 148 state = S_NOTHING; 149 return Token::null(); 150 } break; 151 default: 152 if ((buf[0] >= '0' && buf[0] <= '9') || (buf[0] == '-')) { 153 result = buf[0]; 154 state = S_INT; 155 } else { 156 throw JSONError(env, errLocation(), "unexpected token `" + string(1, buf[0]) + "'"); 157 } 158 break; 159 } 160 break; 161 case S_STRING_ESCAPE: 162 switch (buf[0]) { 163 case 'n': 164 result += "\n"; 165 break; 166 case 't': 167 result += "\t"; 168 break; 169 case '"': 170 result += "\""; 171 break; 172 case '\\': 173 result += "\\"; 174 break; 175 default: 176 result += "\\"; 177 result += buf[0]; 178 break; 179 } 180 state = S_STRING; 181 break; 182 case S_STRING: 183 if (buf[0] == '"') { 184 state = S_NOTHING; 185 return Token(result); 186 } 187 if (buf[0] == '\\') { 188 state = S_STRING_ESCAPE; 189 } else { 190 result += buf[0]; 191 } 192 break; 193 case S_INT: 194 if (buf[0] == '.') { 195 result += buf[0]; 196 state = S_FLOAT; 197 } else if (buf[0] >= '0' && buf[0] <= '9') { 198 result += buf[0]; 199 } else { 200 is.unget(); 201 std::istringstream iss(result); 202 int v; 203 iss >> v; 204 state = S_NOTHING; 205 return Token(v); 206 } 207 break; 208 case S_FLOAT: 209 if (buf[0] >= '0' && buf[0] <= '9') { 210 result += buf[0]; 211 } else { 212 is.unget(); 213 std::istringstream iss(result); 214 double v; 215 iss >> v; 216 state = S_NOTHING; 217 return Token(v); 218 } 219 break; 220 } 221 } 222 throw JSONError(env, errLocation(), "unexpected token `" + string(result) + "'"); 223} 224 225void JSONParser::expectToken(istream& is, JSONParser::TokenT t) { 226 Token rt = readToken(is); 227 if (rt.t != t) { 228 throw JSONError(env, errLocation(), "unexpected token"); 229 } 230} 231 232string JSONParser::expectString(istream& is) { 233 Token rt = readToken(is); 234 if (rt.t != T_STRING) { 235 throw JSONError(env, errLocation(), "unexpected token, expected string"); 236 } 237 return rt.s; 238} 239 240JSONParser::Token JSONParser::parseEnumString(istream& is) { 241 Token next = readToken(is); 242 if (next.t != T_STRING) { 243 throw JSONError(env, errLocation(), "invalid enum object"); 244 } 245 if (next.s.empty()) { 246 throw JSONError(env, errLocation(), "invalid enum identifier"); 247 } 248 size_t nonIdChar = 249 next.s.find_first_not_of("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_"); 250 size_t nonIdBegin = next.s.find_first_of("0123456789_"); 251 if (nonIdChar != std::string::npos || nonIdBegin == 0) { 252 next.s = "'" + next.s + "'"; 253 } 254 return next; 255} 256 257Expression* JSONParser::parseObject(istream& is) { 258 // precondition: found T_OBJ_OPEN 259 Token objid = readToken(is); 260 if (objid.t != T_STRING) throw JSONError(env, errLocation(), "invalid object"); 261 expectToken(is, T_COLON); 262 if (objid.s == "set") { 263 expectToken(is, T_LIST_OPEN); 264 vector<Token> elems; 265 TokenT listT = T_COLON; // dummy marker 266 for (Token next = readToken(is); next.t != T_LIST_CLOSE; next = readToken(is)) { 267 switch (next.t) { 268 case T_COMMA: 269 break; 270 case T_INT: 271 if (listT == T_STRING || listT == T_OBJ_OPEN) 272 throw JSONError(env, errLocation(), "invalid set literal"); 273 if (listT != T_FLOAT) listT = T_INT; 274 elems.push_back(next); 275 break; 276 case T_FLOAT: 277 if (listT == T_STRING || listT == T_OBJ_OPEN) 278 throw JSONError(env, errLocation(), "invalid set literal"); 279 listT = T_FLOAT; 280 elems.push_back(next); 281 break; 282 case T_STRING: 283 if (listT != T_COLON && listT != T_STRING) 284 throw JSONError(env, errLocation(), "invalid set literal"); 285 listT = T_STRING; 286 elems.push_back(next); 287 break; 288 case T_BOOL: 289 if (listT == T_STRING || listT == T_OBJ_OPEN) 290 throw JSONError(env, errLocation(), "invalid set literal"); 291 if (listT == T_COLON) listT = T_BOOL; 292 elems.push_back(next); 293 break; 294 case T_OBJ_OPEN: { 295 if (listT != T_COLON && listT != T_OBJ_OPEN) 296 throw JSONError(env, errLocation(), "invalid set literal"); 297 listT = T_OBJ_OPEN; 298 Token enumid = readToken(is); 299 if (enumid.t != T_STRING || enumid.s != "e") 300 throw JSONError(env, errLocation(), "invalid enum object"); 301 expectToken(is, T_COLON); 302 Token next = parseEnumString(is); 303 expectToken(is, T_OBJ_CLOSE); 304 elems.push_back(next); 305 break; 306 } 307 default: 308 throw JSONError(env, errLocation(), "invalid set literal"); 309 } 310 } 311 expectToken(is, T_OBJ_CLOSE); 312 vector<Expression*> elems_e(elems.size()); 313 switch (listT) { 314 case T_COLON: 315 break; 316 case T_BOOL: 317 for (unsigned int i = 0; i < elems.size(); i++) { 318 elems_e[i] = new BoolLit(Location().introduce(), elems[i].b); 319 } 320 break; 321 case T_INT: 322 for (unsigned int i = 0; i < elems.size(); i++) { 323 elems_e[i] = IntLit::a(elems[i].i); 324 } 325 break; 326 case T_FLOAT: 327 for (unsigned int i = 0; i < elems.size(); i++) { 328 elems_e[i] = FloatLit::a(elems[i].d); 329 } 330 break; 331 case T_STRING: 332 for (unsigned int i = 0; i < elems.size(); i++) { 333 elems_e[i] = new StringLit(Location().introduce(), elems[i].s); 334 } 335 break; 336 case T_OBJ_OPEN: 337 for (unsigned int i = 0; i < elems.size(); i++) { 338 elems_e[i] = new Id(Location().introduce(), ASTString(elems[i].s), NULL); 339 } 340 break; 341 default: 342 break; 343 } 344 return new SetLit(Location().introduce(), elems_e); 345 } else if (objid.s == "e") { 346 Token next = parseEnumString(is); 347 expectToken(is, T_OBJ_CLOSE); 348 return new Id(Location().introduce(), ASTString(next.s), NULL); 349 } else { 350 throw JSONError(env, errLocation(), "invalid object"); 351 } 352} 353 354ArrayLit* JSONParser::parseArray(std::istream& is) { 355 // precondition: opening parenthesis has been read 356 vector<Expression*> exps; 357 vector<pair<int, int> > dims; 358 dims.emplace_back(1, 0); 359 vector<bool> hadDim; 360 hadDim.push_back(false); 361 Token next; 362 for (;;) { 363 next = readToken(is); 364 if (next.t != T_LIST_OPEN) break; 365 dims.emplace_back(1, 0); 366 hadDim.push_back(false); 367 } 368 int curDim = static_cast<int>(dims.size()) - 1; 369 for (;;) { 370 switch (next.t) { 371 case T_LIST_CLOSE: 372 hadDim[curDim] = true; 373 curDim--; 374 if (curDim < 0) { 375 goto list_done; 376 } else if (!hadDim[curDim]) { 377 dims[curDim].second++; 378 } 379 break; 380 case T_LIST_OPEN: 381 curDim++; 382 break; 383 case T_COMMA: 384 break; 385 case T_INT: 386 if (!hadDim[curDim]) { 387 dims[curDim].second++; 388 } 389 exps.push_back(IntLit::a(next.i)); 390 break; 391 case T_FLOAT: 392 if (!hadDim[curDim]) { 393 dims[curDim].second++; 394 } 395 exps.push_back(FloatLit::a(next.d)); 396 break; 397 case T_STRING: 398 if (!hadDim[curDim]) { 399 dims[curDim].second++; 400 } 401 exps.push_back(new StringLit(Location().introduce(), next.s)); 402 break; 403 case T_BOOL: 404 if (!hadDim[curDim]) { 405 dims[curDim].second++; 406 } 407 exps.push_back(new BoolLit(Location().introduce(), next.b)); 408 break; 409 case T_NULL: 410 if (!hadDim[curDim]) { 411 dims[curDim].second++; 412 } 413 exps.push_back(constants().absent); 414 break; 415 case T_OBJ_OPEN: 416 if (!hadDim[curDim]) { 417 dims[curDim].second++; 418 } 419 exps.push_back(parseObject(is)); 420 break; 421 default: 422 throw JSONError(env, errLocation(), "cannot parse JSON file"); 423 break; 424 } 425 next = readToken(is); 426 } 427list_done: 428 unsigned int expectedSize = 1; 429 for (auto& d : dims) { 430 expectedSize *= d.second; 431 } 432 if (exps.size() != expectedSize) { 433 throw JSONError(env, errLocation(), "mismatch in array dimensions"); 434 /// TODO: check each individual sub-array 435 } 436 return new ArrayLit(Location().introduce(), exps, dims); 437} 438 439Expression* JSONParser::parseExp(std::istream& is) { 440 Token next = readToken(is); 441 switch (next.t) { 442 case T_INT: 443 return IntLit::a(next.i); 444 break; 445 case T_FLOAT: 446 return FloatLit::a(next.d); 447 case T_STRING: 448 return new StringLit(Location().introduce(), next.s); 449 case T_BOOL: 450 return new BoolLit(Location().introduce(), next.b); 451 case T_NULL: 452 return constants().absent; 453 case T_OBJ_OPEN: 454 return parseObject(is); 455 case T_LIST_OPEN: 456 return parseArray(is); 457 default: 458 throw JSONError(env, errLocation(), "cannot parse JSON file"); 459 break; 460 } 461} 462 463void JSONParser::parse(Model* m, std::istream& is) { 464 line = 0; 465 column = 0; 466 expectToken(is, T_OBJ_OPEN); 467 for (;;) { 468 string ident = expectString(is); 469 expectToken(is, T_COLON); 470 Expression* e = parseExp(is); 471 if (ident[0] != '_') { 472 AssignI* ai = new AssignI(Location().introduce(), ident, e); 473 m->addItem(ai); 474 } 475 Token next = readToken(is); 476 if (next.t == T_OBJ_CLOSE) break; 477 if (next.t != T_COMMA) throw JSONError(env, errLocation(), "cannot parse JSON file"); 478 } 479} 480 481void JSONParser::parse(Model* m, const std::string& filename0) { 482 filename = filename0; 483 ifstream is; 484 is.open(filename, ios::in); 485 if (!is.good()) { 486 throw JSONError(env, Location().introduce(), "cannot open file " + filename); 487 } 488 parse(m, is); 489} 490 491void JSONParser::parseFromString(Model* m, const std::string& data) { 492 istringstream iss(data); 493 line = 0; 494 column = 0; 495 parse(m, iss); 496} 497 498namespace { 499bool isJSON(std::istream& is) { 500 while (is.good()) { 501 char c = is.get(); 502 if (c == '{') return true; 503 if (c != ' ' && c != '\n' && c != '\t' && c != '\r') return false; 504 } 505 return false; 506} 507} // namespace 508 509bool JSONParser::stringIsJSON(const std::string& data) { 510 std::istringstream iss(data); 511 return isJSON(iss); 512} 513 514bool JSONParser::fileIsJSON(const std::string& filename) { 515 ifstream is; 516 is.open(filename, ios::in); 517 return isJSON(is); 518} 519 520} // namespace MiniZinc