File:  [Qemu by Fabrice Bellard] / qemu / json-lexer.c
Revision 1.1.1.3 (vendor branch): download - view: text, annotated - select for diffs
Tue Apr 24 18:23:35 2018 UTC (3 years, 1 month ago) by root
Branches: qemu, MAIN
CVS tags: qemu0150, qemu0141, qemu0140, qemu0130, HEAD
qemu 0.13.0

    1: /*
    2:  * JSON lexer
    3:  *
    4:  * Copyright IBM, Corp. 2009
    5:  *
    6:  * Authors:
    7:  *  Anthony Liguori   <aliguori@us.ibm.com>
    8:  *
    9:  * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
   10:  * See the COPYING.LIB file in the top-level directory.
   11:  *
   12:  */
   13: 
   14: #include "qstring.h"
   15: #include "qlist.h"
   16: #include "qdict.h"
   17: #include "qint.h"
   18: #include "qemu-common.h"
   19: #include "json-lexer.h"
   20: 
   21: /*
   22:  * \"([^\\\"]|(\\\"\\'\\\\\\/\\b\\f\\n\\r\\t\\u[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]))*\"
   23:  * '([^\\']|(\\\"\\'\\\\\\/\\b\\f\\n\\r\\t\\u[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]))*'
   24:  * 0|([1-9][0-9]*(.[0-9]+)?([eE]([-+])?[0-9]+))
   25:  * [{}\[\],:]
   26:  * [a-z]+
   27:  *
   28:  */
   29: 
   30: enum json_lexer_state {
   31:     ERROR = 0,
   32:     IN_DQ_UCODE3,
   33:     IN_DQ_UCODE2,
   34:     IN_DQ_UCODE1,
   35:     IN_DQ_UCODE0,
   36:     IN_DQ_STRING_ESCAPE,
   37:     IN_DQ_STRING,
   38:     IN_SQ_UCODE3,
   39:     IN_SQ_UCODE2,
   40:     IN_SQ_UCODE1,
   41:     IN_SQ_UCODE0,
   42:     IN_SQ_STRING_ESCAPE,
   43:     IN_SQ_STRING,
   44:     IN_ZERO,
   45:     IN_DIGITS,
   46:     IN_DIGIT,
   47:     IN_EXP_E,
   48:     IN_MANTISSA,
   49:     IN_MANTISSA_DIGITS,
   50:     IN_NONZERO_NUMBER,
   51:     IN_NEG_NONZERO_NUMBER,
   52:     IN_KEYWORD,
   53:     IN_ESCAPE,
   54:     IN_ESCAPE_L,
   55:     IN_ESCAPE_LL,
   56:     IN_ESCAPE_I,
   57:     IN_ESCAPE_I6,
   58:     IN_ESCAPE_I64,
   59:     IN_WHITESPACE,
   60:     IN_START,
   61: };
   62: 
   63: #define TERMINAL(state) [0 ... 0x7F] = (state)
   64: 
   65: /* Return whether TERMINAL is a terminal state and the transition to it
   66:    from OLD_STATE required lookahead.  This happens whenever the table
   67:    below uses the TERMINAL macro.  */
   68: #define TERMINAL_NEEDED_LOOKAHEAD(old_state, terminal) \
   69:             (json_lexer[(old_state)][0] == (terminal))
   70: 
   71: static const uint8_t json_lexer[][256] =  {
   72:     /* double quote string */
   73:     [IN_DQ_UCODE3] = {
   74:         ['0' ... '9'] = IN_DQ_STRING,
   75:         ['a' ... 'f'] = IN_DQ_STRING,
   76:         ['A' ... 'F'] = IN_DQ_STRING,
   77:     },
   78:     [IN_DQ_UCODE2] = {
   79:         ['0' ... '9'] = IN_DQ_UCODE3,
   80:         ['a' ... 'f'] = IN_DQ_UCODE3,
   81:         ['A' ... 'F'] = IN_DQ_UCODE3,
   82:     },
   83:     [IN_DQ_UCODE1] = {
   84:         ['0' ... '9'] = IN_DQ_UCODE2,
   85:         ['a' ... 'f'] = IN_DQ_UCODE2,
   86:         ['A' ... 'F'] = IN_DQ_UCODE2,
   87:     },
   88:     [IN_DQ_UCODE0] = {
   89:         ['0' ... '9'] = IN_DQ_UCODE1,
   90:         ['a' ... 'f'] = IN_DQ_UCODE1,
   91:         ['A' ... 'F'] = IN_DQ_UCODE1,
   92:     },
   93:     [IN_DQ_STRING_ESCAPE] = {
   94:         ['b'] = IN_DQ_STRING,
   95:         ['f'] =  IN_DQ_STRING,
   96:         ['n'] =  IN_DQ_STRING,
   97:         ['r'] =  IN_DQ_STRING,
   98:         ['t'] =  IN_DQ_STRING,
   99:         ['/'] = IN_DQ_STRING,
  100:         ['\\'] = IN_DQ_STRING,
  101:         ['\''] = IN_DQ_STRING,
  102:         ['\"'] = IN_DQ_STRING,
  103:         ['u'] = IN_DQ_UCODE0,
  104:     },
  105:     [IN_DQ_STRING] = {
  106:         [1 ... 0xFF] = IN_DQ_STRING,
  107:         ['\\'] = IN_DQ_STRING_ESCAPE,
  108:         ['"'] = JSON_STRING,
  109:     },
  110: 
  111:     /* single quote string */
  112:     [IN_SQ_UCODE3] = {
  113:         ['0' ... '9'] = IN_SQ_STRING,
  114:         ['a' ... 'f'] = IN_SQ_STRING,
  115:         ['A' ... 'F'] = IN_SQ_STRING,
  116:     },
  117:     [IN_SQ_UCODE2] = {
  118:         ['0' ... '9'] = IN_SQ_UCODE3,
  119:         ['a' ... 'f'] = IN_SQ_UCODE3,
  120:         ['A' ... 'F'] = IN_SQ_UCODE3,
  121:     },
  122:     [IN_SQ_UCODE1] = {
  123:         ['0' ... '9'] = IN_SQ_UCODE2,
  124:         ['a' ... 'f'] = IN_SQ_UCODE2,
  125:         ['A' ... 'F'] = IN_SQ_UCODE2,
  126:     },
  127:     [IN_SQ_UCODE0] = {
  128:         ['0' ... '9'] = IN_SQ_UCODE1,
  129:         ['a' ... 'f'] = IN_SQ_UCODE1,
  130:         ['A' ... 'F'] = IN_SQ_UCODE1,
  131:     },
  132:     [IN_SQ_STRING_ESCAPE] = {
  133:         ['b'] = IN_SQ_STRING,
  134:         ['f'] =  IN_SQ_STRING,
  135:         ['n'] =  IN_SQ_STRING,
  136:         ['r'] =  IN_SQ_STRING,
  137:         ['t'] =  IN_SQ_STRING,
  138:         ['/'] = IN_DQ_STRING,
  139:         ['\\'] = IN_DQ_STRING,
  140:         ['\''] = IN_SQ_STRING,
  141:         ['\"'] = IN_SQ_STRING,
  142:         ['u'] = IN_SQ_UCODE0,
  143:     },
  144:     [IN_SQ_STRING] = {
  145:         [1 ... 0xFF] = IN_SQ_STRING,
  146:         ['\\'] = IN_SQ_STRING_ESCAPE,
  147:         ['\''] = JSON_STRING,
  148:     },
  149: 
  150:     /* Zero */
  151:     [IN_ZERO] = {
  152:         TERMINAL(JSON_INTEGER),
  153:         ['0' ... '9'] = ERROR,
  154:         ['.'] = IN_MANTISSA,
  155:     },
  156: 
  157:     /* Float */
  158:     [IN_DIGITS] = {
  159:         TERMINAL(JSON_FLOAT),
  160:         ['0' ... '9'] = IN_DIGITS,
  161:     },
  162: 
  163:     [IN_DIGIT] = {
  164:         ['0' ... '9'] = IN_DIGITS,
  165:     },
  166: 
  167:     [IN_EXP_E] = {
  168:         ['-'] = IN_DIGIT,
  169:         ['+'] = IN_DIGIT,
  170:         ['0' ... '9'] = IN_DIGITS,
  171:     },
  172: 
  173:     [IN_MANTISSA_DIGITS] = {
  174:         TERMINAL(JSON_FLOAT),
  175:         ['0' ... '9'] = IN_MANTISSA_DIGITS,
  176:         ['e'] = IN_EXP_E,
  177:         ['E'] = IN_EXP_E,
  178:     },
  179: 
  180:     [IN_MANTISSA] = {
  181:         ['0' ... '9'] = IN_MANTISSA_DIGITS,
  182:     },
  183: 
  184:     /* Number */
  185:     [IN_NONZERO_NUMBER] = {
  186:         TERMINAL(JSON_INTEGER),
  187:         ['0' ... '9'] = IN_NONZERO_NUMBER,
  188:         ['e'] = IN_EXP_E,
  189:         ['E'] = IN_EXP_E,
  190:         ['.'] = IN_MANTISSA,
  191:     },
  192: 
  193:     [IN_NEG_NONZERO_NUMBER] = {
  194:         ['0'] = IN_ZERO,
  195:         ['1' ... '9'] = IN_NONZERO_NUMBER,
  196:     },
  197: 
  198:     /* keywords */
  199:     [IN_KEYWORD] = {
  200:         TERMINAL(JSON_KEYWORD),
  201:         ['a' ... 'z'] = IN_KEYWORD,
  202:     },
  203: 
  204:     /* whitespace */
  205:     [IN_WHITESPACE] = {
  206:         TERMINAL(JSON_SKIP),
  207:         [' '] = IN_WHITESPACE,
  208:         ['\t'] = IN_WHITESPACE,
  209:         ['\r'] = IN_WHITESPACE,
  210:         ['\n'] = IN_WHITESPACE,
  211:     },        
  212: 
  213:     /* escape */
  214:     [IN_ESCAPE_LL] = {
  215:         ['d'] = JSON_ESCAPE,
  216:     },
  217: 
  218:     [IN_ESCAPE_L] = {
  219:         ['d'] = JSON_ESCAPE,
  220:         ['l'] = IN_ESCAPE_LL,
  221:     },
  222: 
  223:     [IN_ESCAPE_I64] = {
  224:         ['d'] = JSON_ESCAPE,
  225:     },
  226: 
  227:     [IN_ESCAPE_I6] = {
  228:         ['4'] = IN_ESCAPE_I64,
  229:     },
  230: 
  231:     [IN_ESCAPE_I] = {
  232:         ['6'] = IN_ESCAPE_I6,
  233:     },
  234: 
  235:     [IN_ESCAPE] = {
  236:         ['d'] = JSON_ESCAPE,
  237:         ['i'] = JSON_ESCAPE,
  238:         ['p'] = JSON_ESCAPE,
  239:         ['s'] = JSON_ESCAPE,
  240:         ['f'] = JSON_ESCAPE,
  241:         ['l'] = IN_ESCAPE_L,
  242:         ['I'] = IN_ESCAPE_I,
  243:     },
  244: 
  245:     /* top level rule */
  246:     [IN_START] = {
  247:         ['"'] = IN_DQ_STRING,
  248:         ['\''] = IN_SQ_STRING,
  249:         ['0'] = IN_ZERO,
  250:         ['1' ... '9'] = IN_NONZERO_NUMBER,
  251:         ['-'] = IN_NEG_NONZERO_NUMBER,
  252:         ['{'] = JSON_OPERATOR,
  253:         ['}'] = JSON_OPERATOR,
  254:         ['['] = JSON_OPERATOR,
  255:         [']'] = JSON_OPERATOR,
  256:         [','] = JSON_OPERATOR,
  257:         [':'] = JSON_OPERATOR,
  258:         ['a' ... 'z'] = IN_KEYWORD,
  259:         ['%'] = IN_ESCAPE,
  260:         [' '] = IN_WHITESPACE,
  261:         ['\t'] = IN_WHITESPACE,
  262:         ['\r'] = IN_WHITESPACE,
  263:         ['\n'] = IN_WHITESPACE,
  264:     },
  265: };
  266: 
  267: void json_lexer_init(JSONLexer *lexer, JSONLexerEmitter func)
  268: {
  269:     lexer->emit = func;
  270:     lexer->state = IN_START;
  271:     lexer->token = qstring_new();
  272:     lexer->x = lexer->y = 0;
  273: }
  274: 
  275: static int json_lexer_feed_char(JSONLexer *lexer, char ch)
  276: {
  277:     int char_consumed, new_state;
  278: 
  279:     lexer->x++;
  280:     if (ch == '\n') {
  281:         lexer->x = 0;
  282:         lexer->y++;
  283:     }
  284: 
  285:     do {
  286:         new_state = json_lexer[lexer->state][(uint8_t)ch];
  287:         char_consumed = !TERMINAL_NEEDED_LOOKAHEAD(lexer->state, new_state);
  288:         if (char_consumed) {
  289:             qstring_append_chr(lexer->token, ch);
  290:         }
  291: 
  292:         switch (new_state) {
  293:         case JSON_OPERATOR:
  294:         case JSON_ESCAPE:
  295:         case JSON_INTEGER:
  296:         case JSON_FLOAT:
  297:         case JSON_KEYWORD:
  298:         case JSON_STRING:
  299:             lexer->emit(lexer, lexer->token, new_state, lexer->x, lexer->y);
  300:         case JSON_SKIP:
  301:             QDECREF(lexer->token);
  302:             lexer->token = qstring_new();
  303:             new_state = IN_START;
  304:             break;
  305:         case ERROR:
  306:             return -EINVAL;
  307:         default:
  308:             break;
  309:         }
  310:         lexer->state = new_state;
  311:     } while (!char_consumed);
  312:     return 0;
  313: }
  314: 
  315: int json_lexer_feed(JSONLexer *lexer, const char *buffer, size_t size)
  316: {
  317:     size_t i;
  318: 
  319:     for (i = 0; i < size; i++) {
  320:         int err;
  321: 
  322:         err = json_lexer_feed_char(lexer, buffer[i]);
  323:         if (err < 0) {
  324:             return err;
  325:         }
  326:     }
  327: 
  328:     return 0;
  329: }
  330: 
  331: int json_lexer_flush(JSONLexer *lexer)
  332: {
  333:     return lexer->state == IN_START ? 0 : json_lexer_feed_char(lexer, 0);
  334: }
  335: 
  336: void json_lexer_destroy(JSONLexer *lexer)
  337: {
  338:     QDECREF(lexer->token);
  339: }

unix.superglobalmegacorp.com