File:  [Qemu by Fabrice Bellard] / qemu / json-parser.c
Revision 1.1.1.5 (vendor branch): download - view: text, annotated - select for diffs
Tue Apr 24 18:33:39 2018 UTC (3 years, 1 month ago) by root
Branches: qemu, MAIN
CVS tags: qemu0150, qemu0141, qemu0140, HEAD
qemu 0.14.0

    1: /*
    2:  * JSON Parser 
    3:  *
    4:  * Copyright IBM, Corp. 2009
    5:  *
    6:  * Authors:
    7:  *  Anthony Liguori   <aliguori@us.ibm.com>
    8:  *
    9:  * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
   10:  * See the COPYING.LIB file in the top-level directory.
   11:  *
   12:  */
   13: 
   14: #include <stdarg.h>
   15: 
   16: #include "qemu-common.h"
   17: #include "qstring.h"
   18: #include "qint.h"
   19: #include "qdict.h"
   20: #include "qlist.h"
   21: #include "qfloat.h"
   22: #include "qbool.h"
   23: #include "json-parser.h"
   24: #include "json-lexer.h"
   25: 
   26: typedef struct JSONParserContext
   27: {
   28: } JSONParserContext;
   29: 
   30: #define BUG_ON(cond) assert(!(cond))
   31: 
   32: /**
   33:  * TODO
   34:  *
   35:  * 0) make errors meaningful again
   36:  * 1) add geometry information to tokens
   37:  * 3) should we return a parsed size?
   38:  * 4) deal with premature EOI
   39:  */
   40: 
   41: static QObject *parse_value(JSONParserContext *ctxt, QList **tokens, va_list *ap);
   42: 
   43: /**
   44:  * Token manipulators
   45:  *
   46:  * tokens are dictionaries that contain a type, a string value, and geometry information
   47:  * about a token identified by the lexer.  These are routines that make working with
   48:  * these objects a bit easier.
   49:  */
   50: static const char *token_get_value(QObject *obj)
   51: {
   52:     return qdict_get_str(qobject_to_qdict(obj), "token");
   53: }
   54: 
   55: static JSONTokenType token_get_type(QObject *obj)
   56: {
   57:     return qdict_get_int(qobject_to_qdict(obj), "type");
   58: }
   59: 
   60: static int token_is_operator(QObject *obj, char op)
   61: {
   62:     const char *val;
   63: 
   64:     if (token_get_type(obj) != JSON_OPERATOR) {
   65:         return 0;
   66:     }
   67: 
   68:     val = token_get_value(obj);
   69: 
   70:     return (val[0] == op) && (val[1] == 0);
   71: }
   72: 
   73: static int token_is_keyword(QObject *obj, const char *value)
   74: {
   75:     if (token_get_type(obj) != JSON_KEYWORD) {
   76:         return 0;
   77:     }
   78: 
   79:     return strcmp(token_get_value(obj), value) == 0;
   80: }
   81: 
   82: static int token_is_escape(QObject *obj, const char *value)
   83: {
   84:     if (token_get_type(obj) != JSON_ESCAPE) {
   85:         return 0;
   86:     }
   87: 
   88:     return (strcmp(token_get_value(obj), value) == 0);
   89: }
   90: 
   91: /**
   92:  * Error handler
   93:  */
   94: static void GCC_FMT_ATTR(3, 4) parse_error(JSONParserContext *ctxt,
   95:                                            QObject *token, const char *msg, ...)
   96: {
   97:     va_list ap;
   98:     va_start(ap, msg);
   99:     fprintf(stderr, "parse error: ");
  100:     vfprintf(stderr, msg, ap);
  101:     fprintf(stderr, "\n");
  102:     va_end(ap);
  103: }
  104: 
  105: /**
  106:  * String helpers
  107:  *
  108:  * These helpers are used to unescape strings.
  109:  */
  110: static void wchar_to_utf8(uint16_t wchar, char *buffer, size_t buffer_length)
  111: {
  112:     if (wchar <= 0x007F) {
  113:         BUG_ON(buffer_length < 2);
  114: 
  115:         buffer[0] = wchar & 0x7F;
  116:         buffer[1] = 0;
  117:     } else if (wchar <= 0x07FF) {
  118:         BUG_ON(buffer_length < 3);
  119: 
  120:         buffer[0] = 0xC0 | ((wchar >> 6) & 0x1F);
  121:         buffer[1] = 0x80 | (wchar & 0x3F);
  122:         buffer[2] = 0;
  123:     } else {
  124:         BUG_ON(buffer_length < 4);
  125: 
  126:         buffer[0] = 0xE0 | ((wchar >> 12) & 0x0F);
  127:         buffer[1] = 0x80 | ((wchar >> 6) & 0x3F);
  128:         buffer[2] = 0x80 | (wchar & 0x3F);
  129:         buffer[3] = 0;
  130:     }
  131: }
  132: 
  133: static int hex2decimal(char ch)
  134: {
  135:     if (ch >= '0' && ch <= '9') {
  136:         return (ch - '0');
  137:     } else if (ch >= 'a' && ch <= 'f') {
  138:         return 10 + (ch - 'a');
  139:     } else if (ch >= 'A' && ch <= 'F') {
  140:         return 10 + (ch - 'A');
  141:     }
  142: 
  143:     return -1;
  144: }
  145: 
  146: /**
  147:  * parse_string(): Parse a json string and return a QObject
  148:  *
  149:  *  string
  150:  *      ""
  151:  *      " chars "
  152:  *  chars
  153:  *      char
  154:  *      char chars
  155:  *  char
  156:  *      any-Unicode-character-
  157:  *          except-"-or-\-or-
  158:  *          control-character
  159:  *      \"
  160:  *      \\
  161:  *      \/
  162:  *      \b
  163:  *      \f
  164:  *      \n
  165:  *      \r
  166:  *      \t
  167:  *      \u four-hex-digits 
  168:  */
  169: static QString *qstring_from_escaped_str(JSONParserContext *ctxt, QObject *token)
  170: {
  171:     const char *ptr = token_get_value(token);
  172:     QString *str;
  173:     int double_quote = 1;
  174: 
  175:     if (*ptr == '"') {
  176:         double_quote = 1;
  177:     } else {
  178:         double_quote = 0;
  179:     }
  180:     ptr++;
  181: 
  182:     str = qstring_new();
  183:     while (*ptr && 
  184:            ((double_quote && *ptr != '"') || (!double_quote && *ptr != '\''))) {
  185:         if (*ptr == '\\') {
  186:             ptr++;
  187: 
  188:             switch (*ptr) {
  189:             case '"':
  190:                 qstring_append(str, "\"");
  191:                 ptr++;
  192:                 break;
  193:             case '\'':
  194:                 qstring_append(str, "'");
  195:                 ptr++;
  196:                 break;
  197:             case '\\':
  198:                 qstring_append(str, "\\");
  199:                 ptr++;
  200:                 break;
  201:             case '/':
  202:                 qstring_append(str, "/");
  203:                 ptr++;
  204:                 break;
  205:             case 'b':
  206:                 qstring_append(str, "\b");
  207:                 ptr++;
  208:                 break;
  209:             case 'f':
  210:                 qstring_append(str, "\f");
  211:                 ptr++;
  212:                 break;
  213:             case 'n':
  214:                 qstring_append(str, "\n");
  215:                 ptr++;
  216:                 break;
  217:             case 'r':
  218:                 qstring_append(str, "\r");
  219:                 ptr++;
  220:                 break;
  221:             case 't':
  222:                 qstring_append(str, "\t");
  223:                 ptr++;
  224:                 break;
  225:             case 'u': {
  226:                 uint16_t unicode_char = 0;
  227:                 char utf8_char[4];
  228:                 int i = 0;
  229: 
  230:                 ptr++;
  231: 
  232:                 for (i = 0; i < 4; i++) {
  233:                     if (qemu_isxdigit(*ptr)) {
  234:                         unicode_char |= hex2decimal(*ptr) << ((3 - i) * 4);
  235:                     } else {
  236:                         parse_error(ctxt, token,
  237:                                     "invalid hex escape sequence in string");
  238:                         goto out;
  239:                     }
  240:                     ptr++;
  241:                 }
  242: 
  243:                 wchar_to_utf8(unicode_char, utf8_char, sizeof(utf8_char));
  244:                 qstring_append(str, utf8_char);
  245:             }   break;
  246:             default:
  247:                 parse_error(ctxt, token, "invalid escape sequence in string");
  248:                 goto out;
  249:             }
  250:         } else {
  251:             char dummy[2];
  252: 
  253:             dummy[0] = *ptr++;
  254:             dummy[1] = 0;
  255: 
  256:             qstring_append(str, dummy);
  257:         }
  258:     }
  259: 
  260:     return str;
  261: 
  262: out:
  263:     QDECREF(str);
  264:     return NULL;
  265: }
  266: 
  267: /**
  268:  * Parsing rules
  269:  */
  270: static int parse_pair(JSONParserContext *ctxt, QDict *dict, QList **tokens, va_list *ap)
  271: {
  272:     QObject *key, *token = NULL, *value, *peek;
  273:     QList *working = qlist_copy(*tokens);
  274: 
  275:     peek = qlist_peek(working);
  276:     key = parse_value(ctxt, &working, ap);
  277:     if (!key || qobject_type(key) != QTYPE_QSTRING) {
  278:         parse_error(ctxt, peek, "key is not a string in object");
  279:         goto out;
  280:     }
  281: 
  282:     token = qlist_pop(working);
  283:     if (!token_is_operator(token, ':')) {
  284:         parse_error(ctxt, token, "missing : in object pair");
  285:         goto out;
  286:     }
  287: 
  288:     value = parse_value(ctxt, &working, ap);
  289:     if (value == NULL) {
  290:         parse_error(ctxt, token, "Missing value in dict");
  291:         goto out;
  292:     }
  293: 
  294:     qdict_put_obj(dict, qstring_get_str(qobject_to_qstring(key)), value);
  295: 
  296:     qobject_decref(token);
  297:     qobject_decref(key);
  298:     QDECREF(*tokens);
  299:     *tokens = working;
  300: 
  301:     return 0;
  302: 
  303: out:
  304:     qobject_decref(token);
  305:     qobject_decref(key);
  306:     QDECREF(working);
  307: 
  308:     return -1;
  309: }
  310: 
  311: static QObject *parse_object(JSONParserContext *ctxt, QList **tokens, va_list *ap)
  312: {
  313:     QDict *dict = NULL;
  314:     QObject *token, *peek;
  315:     QList *working = qlist_copy(*tokens);
  316: 
  317:     token = qlist_pop(working);
  318:     if (!token_is_operator(token, '{')) {
  319:         goto out;
  320:     }
  321:     qobject_decref(token);
  322:     token = NULL;
  323: 
  324:     dict = qdict_new();
  325: 
  326:     peek = qlist_peek(working);
  327:     if (!token_is_operator(peek, '}')) {
  328:         if (parse_pair(ctxt, dict, &working, ap) == -1) {
  329:             goto out;
  330:         }
  331: 
  332:         token = qlist_pop(working);
  333:         while (!token_is_operator(token, '}')) {
  334:             if (!token_is_operator(token, ',')) {
  335:                 parse_error(ctxt, token, "expected separator in dict");
  336:                 goto out;
  337:             }
  338:             qobject_decref(token);
  339:             token = NULL;
  340: 
  341:             if (parse_pair(ctxt, dict, &working, ap) == -1) {
  342:                 goto out;
  343:             }
  344: 
  345:             token = qlist_pop(working);
  346:         }
  347:         qobject_decref(token);
  348:         token = NULL;
  349:     } else {
  350:         token = qlist_pop(working);
  351:         qobject_decref(token);
  352:         token = NULL;
  353:     }
  354: 
  355:     QDECREF(*tokens);
  356:     *tokens = working;
  357: 
  358:     return QOBJECT(dict);
  359: 
  360: out:
  361:     qobject_decref(token);
  362:     QDECREF(working);
  363:     QDECREF(dict);
  364:     return NULL;
  365: }
  366: 
  367: static QObject *parse_array(JSONParserContext *ctxt, QList **tokens, va_list *ap)
  368: {
  369:     QList *list = NULL;
  370:     QObject *token, *peek;
  371:     QList *working = qlist_copy(*tokens);
  372: 
  373:     token = qlist_pop(working);
  374:     if (!token_is_operator(token, '[')) {
  375:         goto out;
  376:     }
  377:     qobject_decref(token);
  378:     token = NULL;
  379: 
  380:     list = qlist_new();
  381: 
  382:     peek = qlist_peek(working);
  383:     if (!token_is_operator(peek, ']')) {
  384:         QObject *obj;
  385: 
  386:         obj = parse_value(ctxt, &working, ap);
  387:         if (obj == NULL) {
  388:             parse_error(ctxt, token, "expecting value");
  389:             goto out;
  390:         }
  391: 
  392:         qlist_append_obj(list, obj);
  393: 
  394:         token = qlist_pop(working);
  395:         while (!token_is_operator(token, ']')) {
  396:             if (!token_is_operator(token, ',')) {
  397:                 parse_error(ctxt, token, "expected separator in list");
  398:                 goto out;
  399:             }
  400: 
  401:             qobject_decref(token);
  402:             token = NULL;
  403: 
  404:             obj = parse_value(ctxt, &working, ap);
  405:             if (obj == NULL) {
  406:                 parse_error(ctxt, token, "expecting value");
  407:                 goto out;
  408:             }
  409: 
  410:             qlist_append_obj(list, obj);
  411: 
  412:             token = qlist_pop(working);
  413:         }
  414: 
  415:         qobject_decref(token);
  416:         token = NULL;
  417:     } else {
  418:         token = qlist_pop(working);
  419:         qobject_decref(token);
  420:         token = NULL;
  421:     }
  422: 
  423:     QDECREF(*tokens);
  424:     *tokens = working;
  425: 
  426:     return QOBJECT(list);
  427: 
  428: out:
  429:     qobject_decref(token);
  430:     QDECREF(working);
  431:     QDECREF(list);
  432:     return NULL;
  433: }
  434: 
  435: static QObject *parse_keyword(JSONParserContext *ctxt, QList **tokens)
  436: {
  437:     QObject *token, *ret;
  438:     QList *working = qlist_copy(*tokens);
  439: 
  440:     token = qlist_pop(working);
  441: 
  442:     if (token_get_type(token) != JSON_KEYWORD) {
  443:         goto out;
  444:     }
  445: 
  446:     if (token_is_keyword(token, "true")) {
  447:         ret = QOBJECT(qbool_from_int(true));
  448:     } else if (token_is_keyword(token, "false")) {
  449:         ret = QOBJECT(qbool_from_int(false));
  450:     } else {
  451:         parse_error(ctxt, token, "invalid keyword `%s'", token_get_value(token));
  452:         goto out;
  453:     }
  454: 
  455:     qobject_decref(token);
  456:     QDECREF(*tokens);
  457:     *tokens = working;
  458: 
  459:     return ret;
  460: 
  461: out: 
  462:     qobject_decref(token);
  463:     QDECREF(working);
  464: 
  465:     return NULL;
  466: }
  467: 
  468: static QObject *parse_escape(JSONParserContext *ctxt, QList **tokens, va_list *ap)
  469: {
  470:     QObject *token = NULL, *obj;
  471:     QList *working = qlist_copy(*tokens);
  472: 
  473:     if (ap == NULL) {
  474:         goto out;
  475:     }
  476: 
  477:     token = qlist_pop(working);
  478: 
  479:     if (token_is_escape(token, "%p")) {
  480:         obj = va_arg(*ap, QObject *);
  481:     } else if (token_is_escape(token, "%i")) {
  482:         obj = QOBJECT(qbool_from_int(va_arg(*ap, int)));
  483:     } else if (token_is_escape(token, "%d")) {
  484:         obj = QOBJECT(qint_from_int(va_arg(*ap, int)));
  485:     } else if (token_is_escape(token, "%ld")) {
  486:         obj = QOBJECT(qint_from_int(va_arg(*ap, long)));
  487:     } else if (token_is_escape(token, "%lld") ||
  488:                token_is_escape(token, "%I64d")) {
  489:         obj = QOBJECT(qint_from_int(va_arg(*ap, long long)));
  490:     } else if (token_is_escape(token, "%s")) {
  491:         obj = QOBJECT(qstring_from_str(va_arg(*ap, const char *)));
  492:     } else if (token_is_escape(token, "%f")) {
  493:         obj = QOBJECT(qfloat_from_double(va_arg(*ap, double)));
  494:     } else {
  495:         goto out;
  496:     }
  497: 
  498:     qobject_decref(token);
  499:     QDECREF(*tokens);
  500:     *tokens = working;
  501: 
  502:     return obj;
  503: 
  504: out:
  505:     qobject_decref(token);
  506:     QDECREF(working);
  507: 
  508:     return NULL;
  509: }
  510: 
  511: static QObject *parse_literal(JSONParserContext *ctxt, QList **tokens)
  512: {
  513:     QObject *token, *obj;
  514:     QList *working = qlist_copy(*tokens);
  515: 
  516:     token = qlist_pop(working);
  517:     switch (token_get_type(token)) {
  518:     case JSON_STRING:
  519:         obj = QOBJECT(qstring_from_escaped_str(ctxt, token));
  520:         break;
  521:     case JSON_INTEGER:
  522:         obj = QOBJECT(qint_from_int(strtoll(token_get_value(token), NULL, 10)));
  523:         break;
  524:     case JSON_FLOAT:
  525:         /* FIXME dependent on locale */
  526:         obj = QOBJECT(qfloat_from_double(strtod(token_get_value(token), NULL)));
  527:         break;
  528:     default:
  529:         goto out;
  530:     }
  531: 
  532:     qobject_decref(token);
  533:     QDECREF(*tokens);
  534:     *tokens = working;
  535: 
  536:     return obj;
  537: 
  538: out:
  539:     qobject_decref(token);
  540:     QDECREF(working);
  541: 
  542:     return NULL;
  543: }
  544: 
  545: static QObject *parse_value(JSONParserContext *ctxt, QList **tokens, va_list *ap)
  546: {
  547:     QObject *obj;
  548: 
  549:     obj = parse_object(ctxt, tokens, ap);
  550:     if (obj == NULL) {
  551:         obj = parse_array(ctxt, tokens, ap);
  552:     }
  553:     if (obj == NULL) {
  554:         obj = parse_escape(ctxt, tokens, ap);
  555:     }
  556:     if (obj == NULL) {
  557:         obj = parse_keyword(ctxt, tokens);
  558:     } 
  559:     if (obj == NULL) {
  560:         obj = parse_literal(ctxt, tokens);
  561:     }
  562: 
  563:     return obj;
  564: }
  565: 
  566: QObject *json_parser_parse(QList *tokens, va_list *ap)
  567: {
  568:     JSONParserContext ctxt = {};
  569:     QList *working = qlist_copy(tokens);
  570:     QObject *result;
  571: 
  572:     result = parse_value(&ctxt, &working, ap);
  573: 
  574:     QDECREF(working);
  575: 
  576:     return result;
  577: }

unix.superglobalmegacorp.com