Annotation of qemu/json-lexer.c, revision 1.1.1.4

1.1       root        1: /*
                      2:  * JSON lexer
                      3:  *
                      4:  * Copyright IBM, Corp. 2009
                      5:  *
                      6:  * Authors:
                      7:  *  Anthony Liguori   <aliguori@us.ibm.com>
                      8:  *
                      9:  * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
                     10:  * See the COPYING.LIB file in the top-level directory.
                     11:  *
                     12:  */
                     13: 
                     14: #include "qstring.h"
                     15: #include "qlist.h"
                     16: #include "qdict.h"
                     17: #include "qint.h"
                     18: #include "qemu-common.h"
                     19: #include "json-lexer.h"
                     20: 
1.1.1.4 ! root       21: #define MAX_TOKEN_SIZE (64ULL << 20)
        !            22: 
1.1       root       23: /*
                     24:  * \"([^\\\"]|(\\\"\\'\\\\\\/\\b\\f\\n\\r\\t\\u[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]))*\"
                     25:  * '([^\\']|(\\\"\\'\\\\\\/\\b\\f\\n\\r\\t\\u[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]))*'
                     26:  * 0|([1-9][0-9]*(.[0-9]+)?([eE]([-+])?[0-9]+))
                     27:  * [{}\[\],:]
                     28:  * [a-z]+
                     29:  *
                     30:  */
                     31: 
                     32: enum json_lexer_state {
1.1.1.4 ! root       33:     IN_ERROR = 0,
1.1       root       34:     IN_DQ_UCODE3,
                     35:     IN_DQ_UCODE2,
                     36:     IN_DQ_UCODE1,
                     37:     IN_DQ_UCODE0,
                     38:     IN_DQ_STRING_ESCAPE,
                     39:     IN_DQ_STRING,
                     40:     IN_SQ_UCODE3,
                     41:     IN_SQ_UCODE2,
                     42:     IN_SQ_UCODE1,
                     43:     IN_SQ_UCODE0,
                     44:     IN_SQ_STRING_ESCAPE,
                     45:     IN_SQ_STRING,
                     46:     IN_ZERO,
                     47:     IN_DIGITS,
                     48:     IN_DIGIT,
                     49:     IN_EXP_E,
                     50:     IN_MANTISSA,
                     51:     IN_MANTISSA_DIGITS,
                     52:     IN_NONZERO_NUMBER,
                     53:     IN_NEG_NONZERO_NUMBER,
                     54:     IN_KEYWORD,
                     55:     IN_ESCAPE,
                     56:     IN_ESCAPE_L,
                     57:     IN_ESCAPE_LL,
1.1.1.2   root       58:     IN_ESCAPE_I,
                     59:     IN_ESCAPE_I6,
                     60:     IN_ESCAPE_I64,
1.1       root       61:     IN_WHITESPACE,
                     62:     IN_START,
                     63: };
                     64: 
                     65: #define TERMINAL(state) [0 ... 0x7F] = (state)
                     66: 
1.1.1.3   root       67: /* Return whether TERMINAL is a terminal state and the transition to it
                     68:    from OLD_STATE required lookahead.  This happens whenever the table
                     69:    below uses the TERMINAL macro.  */
                     70: #define TERMINAL_NEEDED_LOOKAHEAD(old_state, terminal) \
                     71:             (json_lexer[(old_state)][0] == (terminal))
1.1       root       72: 
1.1.1.3   root       73: static const uint8_t json_lexer[][256] =  {
1.1       root       74:     /* double quote string */
                     75:     [IN_DQ_UCODE3] = {
                     76:         ['0' ... '9'] = IN_DQ_STRING,
                     77:         ['a' ... 'f'] = IN_DQ_STRING,
                     78:         ['A' ... 'F'] = IN_DQ_STRING,
                     79:     },
                     80:     [IN_DQ_UCODE2] = {
                     81:         ['0' ... '9'] = IN_DQ_UCODE3,
                     82:         ['a' ... 'f'] = IN_DQ_UCODE3,
                     83:         ['A' ... 'F'] = IN_DQ_UCODE3,
                     84:     },
                     85:     [IN_DQ_UCODE1] = {
                     86:         ['0' ... '9'] = IN_DQ_UCODE2,
                     87:         ['a' ... 'f'] = IN_DQ_UCODE2,
                     88:         ['A' ... 'F'] = IN_DQ_UCODE2,
                     89:     },
                     90:     [IN_DQ_UCODE0] = {
                     91:         ['0' ... '9'] = IN_DQ_UCODE1,
                     92:         ['a' ... 'f'] = IN_DQ_UCODE1,
                     93:         ['A' ... 'F'] = IN_DQ_UCODE1,
                     94:     },
                     95:     [IN_DQ_STRING_ESCAPE] = {
                     96:         ['b'] = IN_DQ_STRING,
                     97:         ['f'] =  IN_DQ_STRING,
                     98:         ['n'] =  IN_DQ_STRING,
                     99:         ['r'] =  IN_DQ_STRING,
                    100:         ['t'] =  IN_DQ_STRING,
1.1.1.3   root      101:         ['/'] = IN_DQ_STRING,
                    102:         ['\\'] = IN_DQ_STRING,
1.1       root      103:         ['\''] = IN_DQ_STRING,
                    104:         ['\"'] = IN_DQ_STRING,
                    105:         ['u'] = IN_DQ_UCODE0,
                    106:     },
                    107:     [IN_DQ_STRING] = {
1.1.1.4 ! root      108:         [1 ... 0xBF] = IN_DQ_STRING,
        !           109:         [0xC2 ... 0xF4] = IN_DQ_STRING,
1.1       root      110:         ['\\'] = IN_DQ_STRING_ESCAPE,
1.1.1.3   root      111:         ['"'] = JSON_STRING,
1.1       root      112:     },
                    113: 
                    114:     /* single quote string */
                    115:     [IN_SQ_UCODE3] = {
                    116:         ['0' ... '9'] = IN_SQ_STRING,
                    117:         ['a' ... 'f'] = IN_SQ_STRING,
                    118:         ['A' ... 'F'] = IN_SQ_STRING,
                    119:     },
                    120:     [IN_SQ_UCODE2] = {
                    121:         ['0' ... '9'] = IN_SQ_UCODE3,
                    122:         ['a' ... 'f'] = IN_SQ_UCODE3,
                    123:         ['A' ... 'F'] = IN_SQ_UCODE3,
                    124:     },
                    125:     [IN_SQ_UCODE1] = {
                    126:         ['0' ... '9'] = IN_SQ_UCODE2,
                    127:         ['a' ... 'f'] = IN_SQ_UCODE2,
                    128:         ['A' ... 'F'] = IN_SQ_UCODE2,
                    129:     },
                    130:     [IN_SQ_UCODE0] = {
                    131:         ['0' ... '9'] = IN_SQ_UCODE1,
                    132:         ['a' ... 'f'] = IN_SQ_UCODE1,
                    133:         ['A' ... 'F'] = IN_SQ_UCODE1,
                    134:     },
                    135:     [IN_SQ_STRING_ESCAPE] = {
                    136:         ['b'] = IN_SQ_STRING,
                    137:         ['f'] =  IN_SQ_STRING,
                    138:         ['n'] =  IN_SQ_STRING,
                    139:         ['r'] =  IN_SQ_STRING,
                    140:         ['t'] =  IN_SQ_STRING,
1.1.1.3   root      141:         ['/'] = IN_DQ_STRING,
                    142:         ['\\'] = IN_DQ_STRING,
1.1       root      143:         ['\''] = IN_SQ_STRING,
                    144:         ['\"'] = IN_SQ_STRING,
                    145:         ['u'] = IN_SQ_UCODE0,
                    146:     },
                    147:     [IN_SQ_STRING] = {
1.1.1.4 ! root      148:         [1 ... 0xBF] = IN_SQ_STRING,
        !           149:         [0xC2 ... 0xF4] = IN_SQ_STRING,
1.1       root      150:         ['\\'] = IN_SQ_STRING_ESCAPE,
1.1.1.3   root      151:         ['\''] = JSON_STRING,
1.1       root      152:     },
                    153: 
                    154:     /* Zero */
                    155:     [IN_ZERO] = {
                    156:         TERMINAL(JSON_INTEGER),
1.1.1.4 ! root      157:         ['0' ... '9'] = IN_ERROR,
1.1       root      158:         ['.'] = IN_MANTISSA,
                    159:     },
                    160: 
                    161:     /* Float */
                    162:     [IN_DIGITS] = {
                    163:         TERMINAL(JSON_FLOAT),
                    164:         ['0' ... '9'] = IN_DIGITS,
                    165:     },
                    166: 
                    167:     [IN_DIGIT] = {
                    168:         ['0' ... '9'] = IN_DIGITS,
                    169:     },
                    170: 
                    171:     [IN_EXP_E] = {
                    172:         ['-'] = IN_DIGIT,
                    173:         ['+'] = IN_DIGIT,
                    174:         ['0' ... '9'] = IN_DIGITS,
                    175:     },
                    176: 
                    177:     [IN_MANTISSA_DIGITS] = {
                    178:         TERMINAL(JSON_FLOAT),
                    179:         ['0' ... '9'] = IN_MANTISSA_DIGITS,
                    180:         ['e'] = IN_EXP_E,
                    181:         ['E'] = IN_EXP_E,
                    182:     },
                    183: 
                    184:     [IN_MANTISSA] = {
                    185:         ['0' ... '9'] = IN_MANTISSA_DIGITS,
                    186:     },
                    187: 
                    188:     /* Number */
                    189:     [IN_NONZERO_NUMBER] = {
                    190:         TERMINAL(JSON_INTEGER),
                    191:         ['0' ... '9'] = IN_NONZERO_NUMBER,
                    192:         ['e'] = IN_EXP_E,
                    193:         ['E'] = IN_EXP_E,
                    194:         ['.'] = IN_MANTISSA,
                    195:     },
                    196: 
                    197:     [IN_NEG_NONZERO_NUMBER] = {
                    198:         ['0'] = IN_ZERO,
                    199:         ['1' ... '9'] = IN_NONZERO_NUMBER,
                    200:     },
                    201: 
                    202:     /* keywords */
                    203:     [IN_KEYWORD] = {
                    204:         TERMINAL(JSON_KEYWORD),
                    205:         ['a' ... 'z'] = IN_KEYWORD,
                    206:     },
                    207: 
                    208:     /* whitespace */
                    209:     [IN_WHITESPACE] = {
                    210:         TERMINAL(JSON_SKIP),
                    211:         [' '] = IN_WHITESPACE,
                    212:         ['\t'] = IN_WHITESPACE,
                    213:         ['\r'] = IN_WHITESPACE,
                    214:         ['\n'] = IN_WHITESPACE,
                    215:     },        
                    216: 
                    217:     /* escape */
                    218:     [IN_ESCAPE_LL] = {
1.1.1.3   root      219:         ['d'] = JSON_ESCAPE,
1.1       root      220:     },
                    221: 
                    222:     [IN_ESCAPE_L] = {
1.1.1.3   root      223:         ['d'] = JSON_ESCAPE,
1.1       root      224:         ['l'] = IN_ESCAPE_LL,
                    225:     },
                    226: 
1.1.1.2   root      227:     [IN_ESCAPE_I64] = {
1.1.1.3   root      228:         ['d'] = JSON_ESCAPE,
1.1.1.2   root      229:     },
                    230: 
                    231:     [IN_ESCAPE_I6] = {
                    232:         ['4'] = IN_ESCAPE_I64,
                    233:     },
                    234: 
                    235:     [IN_ESCAPE_I] = {
                    236:         ['6'] = IN_ESCAPE_I6,
                    237:     },
                    238: 
1.1       root      239:     [IN_ESCAPE] = {
1.1.1.3   root      240:         ['d'] = JSON_ESCAPE,
                    241:         ['i'] = JSON_ESCAPE,
                    242:         ['p'] = JSON_ESCAPE,
                    243:         ['s'] = JSON_ESCAPE,
                    244:         ['f'] = JSON_ESCAPE,
1.1       root      245:         ['l'] = IN_ESCAPE_L,
1.1.1.2   root      246:         ['I'] = IN_ESCAPE_I,
1.1       root      247:     },
                    248: 
                    249:     /* top level rule */
                    250:     [IN_START] = {
                    251:         ['"'] = IN_DQ_STRING,
                    252:         ['\''] = IN_SQ_STRING,
                    253:         ['0'] = IN_ZERO,
                    254:         ['1' ... '9'] = IN_NONZERO_NUMBER,
                    255:         ['-'] = IN_NEG_NONZERO_NUMBER,
1.1.1.3   root      256:         ['{'] = JSON_OPERATOR,
                    257:         ['}'] = JSON_OPERATOR,
                    258:         ['['] = JSON_OPERATOR,
                    259:         [']'] = JSON_OPERATOR,
                    260:         [','] = JSON_OPERATOR,
                    261:         [':'] = JSON_OPERATOR,
1.1       root      262:         ['a' ... 'z'] = IN_KEYWORD,
                    263:         ['%'] = IN_ESCAPE,
                    264:         [' '] = IN_WHITESPACE,
                    265:         ['\t'] = IN_WHITESPACE,
                    266:         ['\r'] = IN_WHITESPACE,
                    267:         ['\n'] = IN_WHITESPACE,
                    268:     },
                    269: };
                    270: 
                    271: void json_lexer_init(JSONLexer *lexer, JSONLexerEmitter func)
                    272: {
                    273:     lexer->emit = func;
                    274:     lexer->state = IN_START;
                    275:     lexer->token = qstring_new();
1.1.1.3   root      276:     lexer->x = lexer->y = 0;
1.1       root      277: }
                    278: 
1.1.1.4 ! root      279: static int json_lexer_feed_char(JSONLexer *lexer, char ch, bool flush)
1.1       root      280: {
1.1.1.3   root      281:     int char_consumed, new_state;
1.1       root      282: 
                    283:     lexer->x++;
                    284:     if (ch == '\n') {
                    285:         lexer->x = 0;
                    286:         lexer->y++;
                    287:     }
                    288: 
1.1.1.3   root      289:     do {
                    290:         new_state = json_lexer[lexer->state][(uint8_t)ch];
                    291:         char_consumed = !TERMINAL_NEEDED_LOOKAHEAD(lexer->state, new_state);
                    292:         if (char_consumed) {
                    293:             qstring_append_chr(lexer->token, ch);
                    294:         }
1.1       root      295: 
1.1.1.3   root      296:         switch (new_state) {
                    297:         case JSON_OPERATOR:
                    298:         case JSON_ESCAPE:
                    299:         case JSON_INTEGER:
                    300:         case JSON_FLOAT:
                    301:         case JSON_KEYWORD:
                    302:         case JSON_STRING:
                    303:             lexer->emit(lexer, lexer->token, new_state, lexer->x, lexer->y);
                    304:         case JSON_SKIP:
                    305:             QDECREF(lexer->token);
                    306:             lexer->token = qstring_new();
                    307:             new_state = IN_START;
                    308:             break;
1.1.1.4 ! root      309:         case IN_ERROR:
        !           310:             /* XXX: To avoid having previous bad input leaving the parser in an
        !           311:              * unresponsive state where we consume unpredictable amounts of
        !           312:              * subsequent "good" input, percolate this error state up to the
        !           313:              * tokenizer/parser by forcing a NULL object to be emitted, then
        !           314:              * reset state.
        !           315:              *
        !           316:              * Also note that this handling is required for reliable channel
        !           317:              * negotiation between QMP and the guest agent, since chr(0xFF)
        !           318:              * is placed at the beginning of certain events to ensure proper
        !           319:              * delivery when the channel is in an unknown state. chr(0xFF) is
        !           320:              * never a valid ASCII/UTF-8 sequence, so this should reliably
        !           321:              * induce an error/flush state.
        !           322:              */
        !           323:             lexer->emit(lexer, lexer->token, JSON_ERROR, lexer->x, lexer->y);
        !           324:             QDECREF(lexer->token);
        !           325:             lexer->token = qstring_new();
        !           326:             new_state = IN_START;
        !           327:             lexer->state = new_state;
        !           328:             return 0;
1.1.1.3   root      329:         default:
                    330:             break;
                    331:         }
                    332:         lexer->state = new_state;
1.1.1.4 ! root      333:     } while (!char_consumed && !flush);
        !           334: 
        !           335:     /* Do not let a single token grow to an arbitrarily large size,
        !           336:      * this is a security consideration.
        !           337:      */
        !           338:     if (lexer->token->length > MAX_TOKEN_SIZE) {
        !           339:         lexer->emit(lexer, lexer->token, lexer->state, lexer->x, lexer->y);
        !           340:         QDECREF(lexer->token);
        !           341:         lexer->token = qstring_new();
        !           342:         lexer->state = IN_START;
        !           343:     }
        !           344: 
1.1       root      345:     return 0;
                    346: }
                    347: 
                    348: int json_lexer_feed(JSONLexer *lexer, const char *buffer, size_t size)
                    349: {
                    350:     size_t i;
                    351: 
                    352:     for (i = 0; i < size; i++) {
                    353:         int err;
                    354: 
1.1.1.4 ! root      355:         err = json_lexer_feed_char(lexer, buffer[i], false);
1.1       root      356:         if (err < 0) {
                    357:             return err;
                    358:         }
                    359:     }
                    360: 
                    361:     return 0;
                    362: }
                    363: 
                    364: int json_lexer_flush(JSONLexer *lexer)
                    365: {
1.1.1.4 ! root      366:     return lexer->state == IN_START ? 0 : json_lexer_feed_char(lexer, 0, true);
1.1       root      367: }
                    368: 
                    369: void json_lexer_destroy(JSONLexer *lexer)
                    370: {
                    371:     QDECREF(lexer->token);
                    372: }

unix.superglobalmegacorp.com