File:  [Qemu by Fabrice Bellard] / qemu / json-parser.c
Revision 1.1.1.6 (vendor branch): download - view: text, annotated - select for diffs
Tue Apr 24 18:55:57 2018 UTC (3 years, 5 months ago) by root
Branches: qemu, MAIN
CVS tags: qemu1101, qemu1001, qemu1000, qemu0151, HEAD
qemu 0.15.1

    1: /*
    2:  * JSON Parser 
    3:  *
    4:  * Copyright IBM, Corp. 2009
    5:  *
    6:  * Authors:
    7:  *  Anthony Liguori   <aliguori@us.ibm.com>
    8:  *
    9:  * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
   10:  * See the COPYING.LIB file in the top-level directory.
   11:  *
   12:  */
   13: 
   14: #include <stdarg.h>
   15: 
   16: #include "qemu-common.h"
   17: #include "qstring.h"
   18: #include "qint.h"
   19: #include "qdict.h"
   20: #include "qlist.h"
   21: #include "qfloat.h"
   22: #include "qbool.h"
   23: #include "json-parser.h"
   24: #include "json-lexer.h"
   25: #include "qerror.h"
   26: 
   27: typedef struct JSONParserContext
   28: {
   29:     Error *err;
   30: } JSONParserContext;
   31: 
   32: #define BUG_ON(cond) assert(!(cond))
   33: 
   34: /**
   35:  * TODO
   36:  *
   37:  * 0) make errors meaningful again
   38:  * 1) add geometry information to tokens
   39:  * 3) should we return a parsed size?
   40:  * 4) deal with premature EOI
   41:  */
   42: 
   43: static QObject *parse_value(JSONParserContext *ctxt, QList **tokens, va_list *ap);
   44: 
   45: /**
   46:  * Token manipulators
   47:  *
   48:  * tokens are dictionaries that contain a type, a string value, and geometry information
   49:  * about a token identified by the lexer.  These are routines that make working with
   50:  * these objects a bit easier.
   51:  */
   52: static const char *token_get_value(QObject *obj)
   53: {
   54:     return qdict_get_str(qobject_to_qdict(obj), "token");
   55: }
   56: 
   57: static JSONTokenType token_get_type(QObject *obj)
   58: {
   59:     return qdict_get_int(qobject_to_qdict(obj), "type");
   60: }
   61: 
   62: static int token_is_operator(QObject *obj, char op)
   63: {
   64:     const char *val;
   65: 
   66:     if (token_get_type(obj) != JSON_OPERATOR) {
   67:         return 0;
   68:     }
   69: 
   70:     val = token_get_value(obj);
   71: 
   72:     return (val[0] == op) && (val[1] == 0);
   73: }
   74: 
   75: static int token_is_keyword(QObject *obj, const char *value)
   76: {
   77:     if (token_get_type(obj) != JSON_KEYWORD) {
   78:         return 0;
   79:     }
   80: 
   81:     return strcmp(token_get_value(obj), value) == 0;
   82: }
   83: 
   84: static int token_is_escape(QObject *obj, const char *value)
   85: {
   86:     if (token_get_type(obj) != JSON_ESCAPE) {
   87:         return 0;
   88:     }
   89: 
   90:     return (strcmp(token_get_value(obj), value) == 0);
   91: }
   92: 
   93: /**
   94:  * Error handler
   95:  */
   96: static void GCC_FMT_ATTR(3, 4) parse_error(JSONParserContext *ctxt,
   97:                                            QObject *token, const char *msg, ...)
   98: {
   99:     va_list ap;
  100:     char message[1024];
  101:     va_start(ap, msg);
  102:     vsnprintf(message, sizeof(message), msg, ap);
  103:     va_end(ap);
  104:     if (ctxt->err) {
  105:         error_free(ctxt->err);
  106:         ctxt->err = NULL;
  107:     }
  108:     error_set(&ctxt->err, QERR_JSON_PARSE_ERROR, message);
  109: }
  110: 
  111: /**
  112:  * String helpers
  113:  *
  114:  * These helpers are used to unescape strings.
  115:  */
  116: static void wchar_to_utf8(uint16_t wchar, char *buffer, size_t buffer_length)
  117: {
  118:     if (wchar <= 0x007F) {
  119:         BUG_ON(buffer_length < 2);
  120: 
  121:         buffer[0] = wchar & 0x7F;
  122:         buffer[1] = 0;
  123:     } else if (wchar <= 0x07FF) {
  124:         BUG_ON(buffer_length < 3);
  125: 
  126:         buffer[0] = 0xC0 | ((wchar >> 6) & 0x1F);
  127:         buffer[1] = 0x80 | (wchar & 0x3F);
  128:         buffer[2] = 0;
  129:     } else {
  130:         BUG_ON(buffer_length < 4);
  131: 
  132:         buffer[0] = 0xE0 | ((wchar >> 12) & 0x0F);
  133:         buffer[1] = 0x80 | ((wchar >> 6) & 0x3F);
  134:         buffer[2] = 0x80 | (wchar & 0x3F);
  135:         buffer[3] = 0;
  136:     }
  137: }
  138: 
  139: static int hex2decimal(char ch)
  140: {
  141:     if (ch >= '0' && ch <= '9') {
  142:         return (ch - '0');
  143:     } else if (ch >= 'a' && ch <= 'f') {
  144:         return 10 + (ch - 'a');
  145:     } else if (ch >= 'A' && ch <= 'F') {
  146:         return 10 + (ch - 'A');
  147:     }
  148: 
  149:     return -1;
  150: }
  151: 
  152: /**
  153:  * parse_string(): Parse a json string and return a QObject
  154:  *
  155:  *  string
  156:  *      ""
  157:  *      " chars "
  158:  *  chars
  159:  *      char
  160:  *      char chars
  161:  *  char
  162:  *      any-Unicode-character-
  163:  *          except-"-or-\-or-
  164:  *          control-character
  165:  *      \"
  166:  *      \\
  167:  *      \/
  168:  *      \b
  169:  *      \f
  170:  *      \n
  171:  *      \r
  172:  *      \t
  173:  *      \u four-hex-digits 
  174:  */
  175: static QString *qstring_from_escaped_str(JSONParserContext *ctxt, QObject *token)
  176: {
  177:     const char *ptr = token_get_value(token);
  178:     QString *str;
  179:     int double_quote = 1;
  180: 
  181:     if (*ptr == '"') {
  182:         double_quote = 1;
  183:     } else {
  184:         double_quote = 0;
  185:     }
  186:     ptr++;
  187: 
  188:     str = qstring_new();
  189:     while (*ptr && 
  190:            ((double_quote && *ptr != '"') || (!double_quote && *ptr != '\''))) {
  191:         if (*ptr == '\\') {
  192:             ptr++;
  193: 
  194:             switch (*ptr) {
  195:             case '"':
  196:                 qstring_append(str, "\"");
  197:                 ptr++;
  198:                 break;
  199:             case '\'':
  200:                 qstring_append(str, "'");
  201:                 ptr++;
  202:                 break;
  203:             case '\\':
  204:                 qstring_append(str, "\\");
  205:                 ptr++;
  206:                 break;
  207:             case '/':
  208:                 qstring_append(str, "/");
  209:                 ptr++;
  210:                 break;
  211:             case 'b':
  212:                 qstring_append(str, "\b");
  213:                 ptr++;
  214:                 break;
  215:             case 'f':
  216:                 qstring_append(str, "\f");
  217:                 ptr++;
  218:                 break;
  219:             case 'n':
  220:                 qstring_append(str, "\n");
  221:                 ptr++;
  222:                 break;
  223:             case 'r':
  224:                 qstring_append(str, "\r");
  225:                 ptr++;
  226:                 break;
  227:             case 't':
  228:                 qstring_append(str, "\t");
  229:                 ptr++;
  230:                 break;
  231:             case 'u': {
  232:                 uint16_t unicode_char = 0;
  233:                 char utf8_char[4];
  234:                 int i = 0;
  235: 
  236:                 ptr++;
  237: 
  238:                 for (i = 0; i < 4; i++) {
  239:                     if (qemu_isxdigit(*ptr)) {
  240:                         unicode_char |= hex2decimal(*ptr) << ((3 - i) * 4);
  241:                     } else {
  242:                         parse_error(ctxt, token,
  243:                                     "invalid hex escape sequence in string");
  244:                         goto out;
  245:                     }
  246:                     ptr++;
  247:                 }
  248: 
  249:                 wchar_to_utf8(unicode_char, utf8_char, sizeof(utf8_char));
  250:                 qstring_append(str, utf8_char);
  251:             }   break;
  252:             default:
  253:                 parse_error(ctxt, token, "invalid escape sequence in string");
  254:                 goto out;
  255:             }
  256:         } else {
  257:             char dummy[2];
  258: 
  259:             dummy[0] = *ptr++;
  260:             dummy[1] = 0;
  261: 
  262:             qstring_append(str, dummy);
  263:         }
  264:     }
  265: 
  266:     return str;
  267: 
  268: out:
  269:     QDECREF(str);
  270:     return NULL;
  271: }
  272: 
  273: /**
  274:  * Parsing rules
  275:  */
  276: static int parse_pair(JSONParserContext *ctxt, QDict *dict, QList **tokens, va_list *ap)
  277: {
  278:     QObject *key = NULL, *token = NULL, *value, *peek;
  279:     QList *working = qlist_copy(*tokens);
  280: 
  281:     peek = qlist_peek(working);
  282:     if (peek == NULL) {
  283:         parse_error(ctxt, NULL, "premature EOI");
  284:         goto out;
  285:     }
  286: 
  287:     key = parse_value(ctxt, &working, ap);
  288:     if (!key || qobject_type(key) != QTYPE_QSTRING) {
  289:         parse_error(ctxt, peek, "key is not a string in object");
  290:         goto out;
  291:     }
  292: 
  293:     token = qlist_pop(working);
  294:     if (token == NULL) {
  295:         parse_error(ctxt, NULL, "premature EOI");
  296:         goto out;
  297:     }
  298: 
  299:     if (!token_is_operator(token, ':')) {
  300:         parse_error(ctxt, token, "missing : in object pair");
  301:         goto out;
  302:     }
  303: 
  304:     value = parse_value(ctxt, &working, ap);
  305:     if (value == NULL) {
  306:         parse_error(ctxt, token, "Missing value in dict");
  307:         goto out;
  308:     }
  309: 
  310:     qdict_put_obj(dict, qstring_get_str(qobject_to_qstring(key)), value);
  311: 
  312:     qobject_decref(token);
  313:     qobject_decref(key);
  314:     QDECREF(*tokens);
  315:     *tokens = working;
  316: 
  317:     return 0;
  318: 
  319: out:
  320:     qobject_decref(token);
  321:     qobject_decref(key);
  322:     QDECREF(working);
  323: 
  324:     return -1;
  325: }
  326: 
  327: static QObject *parse_object(JSONParserContext *ctxt, QList **tokens, va_list *ap)
  328: {
  329:     QDict *dict = NULL;
  330:     QObject *token, *peek;
  331:     QList *working = qlist_copy(*tokens);
  332: 
  333:     token = qlist_pop(working);
  334:     if (token == NULL) {
  335:         goto out;
  336:     }
  337: 
  338:     if (!token_is_operator(token, '{')) {
  339:         goto out;
  340:     }
  341:     qobject_decref(token);
  342:     token = NULL;
  343: 
  344:     dict = qdict_new();
  345: 
  346:     peek = qlist_peek(working);
  347:     if (peek == NULL) {
  348:         parse_error(ctxt, NULL, "premature EOI");
  349:         goto out;
  350:     }
  351: 
  352:     if (!token_is_operator(peek, '}')) {
  353:         if (parse_pair(ctxt, dict, &working, ap) == -1) {
  354:             goto out;
  355:         }
  356: 
  357:         token = qlist_pop(working);
  358:         if (token == NULL) {
  359:             parse_error(ctxt, NULL, "premature EOI");
  360:             goto out;
  361:         }
  362: 
  363:         while (!token_is_operator(token, '}')) {
  364:             if (!token_is_operator(token, ',')) {
  365:                 parse_error(ctxt, token, "expected separator in dict");
  366:                 goto out;
  367:             }
  368:             qobject_decref(token);
  369:             token = NULL;
  370: 
  371:             if (parse_pair(ctxt, dict, &working, ap) == -1) {
  372:                 goto out;
  373:             }
  374: 
  375:             token = qlist_pop(working);
  376:             if (token == NULL) {
  377:                 parse_error(ctxt, NULL, "premature EOI");
  378:                 goto out;
  379:             }
  380:         }
  381:         qobject_decref(token);
  382:         token = NULL;
  383:     } else {
  384:         token = qlist_pop(working);
  385:         qobject_decref(token);
  386:         token = NULL;
  387:     }
  388: 
  389:     QDECREF(*tokens);
  390:     *tokens = working;
  391: 
  392:     return QOBJECT(dict);
  393: 
  394: out:
  395:     qobject_decref(token);
  396:     QDECREF(working);
  397:     QDECREF(dict);
  398:     return NULL;
  399: }
  400: 
  401: static QObject *parse_array(JSONParserContext *ctxt, QList **tokens, va_list *ap)
  402: {
  403:     QList *list = NULL;
  404:     QObject *token, *peek;
  405:     QList *working = qlist_copy(*tokens);
  406: 
  407:     token = qlist_pop(working);
  408:     if (token == NULL) {
  409:         goto out;
  410:     }
  411: 
  412:     if (!token_is_operator(token, '[')) {
  413:         goto out;
  414:     }
  415:     qobject_decref(token);
  416:     token = NULL;
  417: 
  418:     list = qlist_new();
  419: 
  420:     peek = qlist_peek(working);
  421:     if (peek == NULL) {
  422:         parse_error(ctxt, NULL, "premature EOI");
  423:         goto out;
  424:     }
  425: 
  426:     if (!token_is_operator(peek, ']')) {
  427:         QObject *obj;
  428: 
  429:         obj = parse_value(ctxt, &working, ap);
  430:         if (obj == NULL) {
  431:             parse_error(ctxt, token, "expecting value");
  432:             goto out;
  433:         }
  434: 
  435:         qlist_append_obj(list, obj);
  436: 
  437:         token = qlist_pop(working);
  438:         if (token == NULL) {
  439:             parse_error(ctxt, NULL, "premature EOI");
  440:             goto out;
  441:         }
  442: 
  443:         while (!token_is_operator(token, ']')) {
  444:             if (!token_is_operator(token, ',')) {
  445:                 parse_error(ctxt, token, "expected separator in list");
  446:                 goto out;
  447:             }
  448: 
  449:             qobject_decref(token);
  450:             token = NULL;
  451: 
  452:             obj = parse_value(ctxt, &working, ap);
  453:             if (obj == NULL) {
  454:                 parse_error(ctxt, token, "expecting value");
  455:                 goto out;
  456:             }
  457: 
  458:             qlist_append_obj(list, obj);
  459: 
  460:             token = qlist_pop(working);
  461:             if (token == NULL) {
  462:                 parse_error(ctxt, NULL, "premature EOI");
  463:                 goto out;
  464:             }
  465:         }
  466: 
  467:         qobject_decref(token);
  468:         token = NULL;
  469:     } else {
  470:         token = qlist_pop(working);
  471:         qobject_decref(token);
  472:         token = NULL;
  473:     }
  474: 
  475:     QDECREF(*tokens);
  476:     *tokens = working;
  477: 
  478:     return QOBJECT(list);
  479: 
  480: out:
  481:     qobject_decref(token);
  482:     QDECREF(working);
  483:     QDECREF(list);
  484:     return NULL;
  485: }
  486: 
  487: static QObject *parse_keyword(JSONParserContext *ctxt, QList **tokens)
  488: {
  489:     QObject *token, *ret;
  490:     QList *working = qlist_copy(*tokens);
  491: 
  492:     token = qlist_pop(working);
  493:     if (token == NULL) {
  494:         goto out;
  495:     }
  496: 
  497:     if (token_get_type(token) != JSON_KEYWORD) {
  498:         goto out;
  499:     }
  500: 
  501:     if (token_is_keyword(token, "true")) {
  502:         ret = QOBJECT(qbool_from_int(true));
  503:     } else if (token_is_keyword(token, "false")) {
  504:         ret = QOBJECT(qbool_from_int(false));
  505:     } else {
  506:         parse_error(ctxt, token, "invalid keyword `%s'", token_get_value(token));
  507:         goto out;
  508:     }
  509: 
  510:     qobject_decref(token);
  511:     QDECREF(*tokens);
  512:     *tokens = working;
  513: 
  514:     return ret;
  515: 
  516: out: 
  517:     qobject_decref(token);
  518:     QDECREF(working);
  519: 
  520:     return NULL;
  521: }
  522: 
  523: static QObject *parse_escape(JSONParserContext *ctxt, QList **tokens, va_list *ap)
  524: {
  525:     QObject *token = NULL, *obj;
  526:     QList *working = qlist_copy(*tokens);
  527: 
  528:     if (ap == NULL) {
  529:         goto out;
  530:     }
  531: 
  532:     token = qlist_pop(working);
  533:     if (token == NULL) {
  534:         goto out;
  535:     }
  536: 
  537:     if (token_is_escape(token, "%p")) {
  538:         obj = va_arg(*ap, QObject *);
  539:     } else if (token_is_escape(token, "%i")) {
  540:         obj = QOBJECT(qbool_from_int(va_arg(*ap, int)));
  541:     } else if (token_is_escape(token, "%d")) {
  542:         obj = QOBJECT(qint_from_int(va_arg(*ap, int)));
  543:     } else if (token_is_escape(token, "%ld")) {
  544:         obj = QOBJECT(qint_from_int(va_arg(*ap, long)));
  545:     } else if (token_is_escape(token, "%lld") ||
  546:                token_is_escape(token, "%I64d")) {
  547:         obj = QOBJECT(qint_from_int(va_arg(*ap, long long)));
  548:     } else if (token_is_escape(token, "%s")) {
  549:         obj = QOBJECT(qstring_from_str(va_arg(*ap, const char *)));
  550:     } else if (token_is_escape(token, "%f")) {
  551:         obj = QOBJECT(qfloat_from_double(va_arg(*ap, double)));
  552:     } else {
  553:         goto out;
  554:     }
  555: 
  556:     qobject_decref(token);
  557:     QDECREF(*tokens);
  558:     *tokens = working;
  559: 
  560:     return obj;
  561: 
  562: out:
  563:     qobject_decref(token);
  564:     QDECREF(working);
  565: 
  566:     return NULL;
  567: }
  568: 
  569: static QObject *parse_literal(JSONParserContext *ctxt, QList **tokens)
  570: {
  571:     QObject *token, *obj;
  572:     QList *working = qlist_copy(*tokens);
  573: 
  574:     token = qlist_pop(working);
  575:     if (token == NULL) {
  576:         goto out;
  577:     }
  578: 
  579:     switch (token_get_type(token)) {
  580:     case JSON_STRING:
  581:         obj = QOBJECT(qstring_from_escaped_str(ctxt, token));
  582:         break;
  583:     case JSON_INTEGER:
  584:         obj = QOBJECT(qint_from_int(strtoll(token_get_value(token), NULL, 10)));
  585:         break;
  586:     case JSON_FLOAT:
  587:         /* FIXME dependent on locale */
  588:         obj = QOBJECT(qfloat_from_double(strtod(token_get_value(token), NULL)));
  589:         break;
  590:     default:
  591:         goto out;
  592:     }
  593: 
  594:     qobject_decref(token);
  595:     QDECREF(*tokens);
  596:     *tokens = working;
  597: 
  598:     return obj;
  599: 
  600: out:
  601:     qobject_decref(token);
  602:     QDECREF(working);
  603: 
  604:     return NULL;
  605: }
  606: 
  607: static QObject *parse_value(JSONParserContext *ctxt, QList **tokens, va_list *ap)
  608: {
  609:     QObject *obj;
  610: 
  611:     obj = parse_object(ctxt, tokens, ap);
  612:     if (obj == NULL) {
  613:         obj = parse_array(ctxt, tokens, ap);
  614:     }
  615:     if (obj == NULL) {
  616:         obj = parse_escape(ctxt, tokens, ap);
  617:     }
  618:     if (obj == NULL) {
  619:         obj = parse_keyword(ctxt, tokens);
  620:     } 
  621:     if (obj == NULL) {
  622:         obj = parse_literal(ctxt, tokens);
  623:     }
  624: 
  625:     return obj;
  626: }
  627: 
  628: QObject *json_parser_parse(QList *tokens, va_list *ap)
  629: {
  630:     return json_parser_parse_err(tokens, ap, NULL);
  631: }
  632: 
  633: QObject *json_parser_parse_err(QList *tokens, va_list *ap, Error **errp)
  634: {
  635:     JSONParserContext ctxt = {};
  636:     QList *working;
  637:     QObject *result;
  638: 
  639:     if (!tokens) {
  640:         return NULL;
  641:     }
  642:     working = qlist_copy(tokens);
  643:     result = parse_value(&ctxt, &working, ap);
  644: 
  645:     QDECREF(working);
  646: 
  647:     error_propagate(errp, ctxt.err);
  648: 
  649:     return result;
  650: }

unix.superglobalmegacorp.com