Annotation of sbbs/javascript/include/mozilla/js/jsstr.h, revision 1.1.1.1

1.1       root        1: /* -*- Mode: C; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
                      2:  *
                      3:  * The contents of this file are subject to the Netscape Public
                      4:  * License Version 1.1 (the "License"); you may not use this file
                      5:  * except in compliance with the License. You may obtain a copy of
                      6:  * the License at http://www.mozilla.org/NPL/
                      7:  *
                      8:  * Software distributed under the License is distributed on an "AS
                      9:  * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
                     10:  * implied. See the License for the specific language governing
                     11:  * rights and limitations under the License.
                     12:  *
                     13:  * The Original Code is Mozilla Communicator client code, released
                     14:  * March 31, 1998.
                     15:  *
                     16:  * The Initial Developer of the Original Code is Netscape
                     17:  * Communications Corporation.  Portions created by Netscape are
                     18:  * Copyright (C) 1998 Netscape Communications Corporation. All
                     19:  * Rights Reserved.
                     20:  *
                     21:  * Contributor(s): 
                     22:  *
                     23:  * Alternatively, the contents of this file may be used under the
                     24:  * terms of the GNU Public License (the "GPL"), in which case the
                     25:  * provisions of the GPL are applicable instead of those above.
                     26:  * If you wish to allow use of your version of this file only
                     27:  * under the terms of the GPL and not to allow others to use your
                     28:  * version of this file under the NPL, indicate your decision by
                     29:  * deleting the provisions above and replace them with the notice
                     30:  * and other provisions required by the GPL.  If you do not delete
                     31:  * the provisions above, a recipient may use your version of this
                     32:  * file under either the NPL or the GPL.
                     33:  */
                     34: 
                     35: #ifndef jsstr_h___
                     36: #define jsstr_h___
                     37: /*
                     38:  * JS string type implementation.
                     39:  *
                     40:  * A JS string is a counted array of unicode characters.  To support handoff
                     41:  * of API client memory, the chars are allocated separately from the length,
                     42:  * necessitating a pointer after the count, to form a separately allocated
                     43:  * string descriptor.  String descriptors are GC'ed, while their chars are
                     44:  * allocated from the malloc heap.
                     45:  *
                     46:  * When a string is treated as an object (by following it with . or []), the
                     47:  * runtime wraps it with a JSObject whose valueOf method returns the unwrapped
                     48:  * string descriptor.
                     49:  */
                     50: #include <ctype.h>
                     51: #include "jspubtd.h"
                     52: #include "jsprvtd.h"
                     53: #include "jshash.h"
                     54: 
                     55: JS_BEGIN_EXTERN_C
                     56: 
                     57: /*
                     58:  * The original GC-thing "string" type, a flat character string owned by its
                     59:  * GC-thing descriptor.  The chars member points to a vector having byte size
                     60:  * (length + 1) * sizeof(jschar), terminated at index length by a zero jschar.
                     61:  * The terminator is purely a backstop, in case the chars pointer flows out to
                     62:  * native code that requires \u0000 termination.
                     63:  *
                     64:  * NB: Always use the JSSTRING_LENGTH and JSSTRING_CHARS accessor macros,
                     65:  * unless you guard str->member uses with !JSSTRING_IS_DEPENDENT(str).
                     66:  */
                     67: struct JSString {
                     68:     size_t          length;
                     69:     jschar          *chars;
                     70: };
                     71: 
                     72: /*
                     73:  * Overlay structure for a string that depends on another string's characters.
                     74:  * Distinguished by the JSSTRFLAG_DEPENDENT bit being set in length.  The base
                     75:  * member may point to another dependent string if JSSTRING_CHARS has not been
                     76:  * called yet.  The length chars in a dependent string are stored starting at
                     77:  * base->chars + start, and are not necessarily zero-terminated.  If start is
                     78:  * 0, it is not stored, length is a full size_t (minus the JSSTRFLAG_* bits in
                     79:  * the high two positions), and the JSSTRFLAG_PREFIX flag is set.
                     80:  */
                     81: struct JSDependentString {
                     82:     size_t          length;
                     83:     JSString        *base;
                     84: };
                     85: 
                     86: /* Definitions for flags stored in the high order bits of JSString.length. */
                     87: #define JSSTRFLAG_BITS              2
                     88: #define JSSTRFLAG_SHIFT(flg)        ((size_t)(flg) << JSSTRING_LENGTH_BITS)
                     89: #define JSSTRFLAG_MASK              JSSTRFLAG_SHIFT(JS_BITMASK(JSSTRFLAG_BITS))
                     90: #define JSSTRFLAG_DEPENDENT         JSSTRFLAG_SHIFT(1)
                     91: #define JSSTRFLAG_PREFIX            JSSTRFLAG_SHIFT(2)
                     92: 
                     93: /* Universal JSString type inquiry and accessor macros. */
                     94: #define JSSTRING_BIT(n)             ((size_t)1 << (n))
                     95: #define JSSTRING_BITMASK(n)         (JSSTRING_BIT(n) - 1)
                     96: #define JSSTRING_HAS_FLAG(str,flg)  ((str)->length & (flg))
                     97: #define JSSTRING_IS_DEPENDENT(str)  JSSTRING_HAS_FLAG(str, JSSTRFLAG_DEPENDENT)
                     98: #define JSSTRING_IS_PREFIX(str)     JSSTRING_HAS_FLAG(str, JSSTRFLAG_PREFIX)
                     99: #define JSSTRING_CHARS(str)         (JSSTRING_IS_DEPENDENT(str)               \
                    100:                                      ? JSSTRDEP_CHARS(str)                    \
                    101:                                      : (str)->chars)
                    102: #define JSSTRING_LENGTH(str)        (JSSTRING_IS_DEPENDENT(str)               \
                    103:                                      ? JSSTRDEP_LENGTH(str)                   \
                    104:                                      : (str)->length)
                    105: #define JSSTRING_LENGTH_BITS        (sizeof(size_t) * JS_BITS_PER_BYTE        \
                    106:                                      - JSSTRFLAG_BITS)
                    107: #define JSSTRING_LENGTH_MASK        JSSTRING_BITMASK(JSSTRING_LENGTH_BITS)
                    108: 
                    109: /* Specific JSDependentString shift/mask accessor and mutator macros. */
                    110: #define JSSTRDEP_START_BITS         (JSSTRING_LENGTH_BITS-JSSTRDEP_LENGTH_BITS)
                    111: #define JSSTRDEP_START_SHIFT        JSSTRDEP_LENGTH_BITS
                    112: #define JSSTRDEP_START_MASK         JSSTRING_BITMASK(JSSTRDEP_START_BITS)
                    113: #define JSSTRDEP_LENGTH_BITS        (JSSTRING_LENGTH_BITS / 2)
                    114: #define JSSTRDEP_LENGTH_MASK        JSSTRING_BITMASK(JSSTRDEP_LENGTH_BITS)
                    115: 
                    116: #define JSSTRDEP(str)               ((JSDependentString *)(str))
                    117: #define JSSTRDEP_START(str)         (JSSTRING_IS_PREFIX(str) ? 0              \
                    118:                                      : ((JSSTRDEP(str)->length                \
                    119:                                          >> JSSTRDEP_START_SHIFT)             \
                    120:                                         & JSSTRDEP_START_MASK))
                    121: #define JSSTRDEP_LENGTH(str)        (JSSTRDEP(str)->length                    \
                    122:                                      & (JSSTRING_IS_PREFIX(str)               \
                    123:                                         ? JSSTRING_LENGTH_MASK                \
                    124:                                         : JSSTRDEP_LENGTH_MASK))
                    125: 
                    126: #define JSSTRDEP_SET_START_AND_LENGTH(str,off,len)                            \
                    127:     (JSSTRDEP(str)->length = JSSTRFLAG_DEPENDENT                              \
                    128:                            | ((off) << JSSTRDEP_START_SHIFT)                  \
                    129:                            | (len))
                    130: #define JSPREFIX_SET_LENGTH(str,len)                                          \
                    131:     (JSSTRDEP(str)->length = JSSTRFLAG_DEPENDENT | JSSTRFLAG_PREFIX | (len))
                    132: 
                    133: #define JSSTRDEP_BASE(str)          (JSSTRDEP(str)->base)
                    134: #define JSSTRDEP_SET_BASE(str,bstr) (JSSTRDEP(str)->base = (bstr))
                    135: #define JSPREFIX_BASE(str)          JSSTRDEP_BASE(str)
                    136: #define JSPREFIX_SET_BASE(str,bstr) JSSTRDEP_SET_BASE(str,bstr)
                    137: 
                    138: #define JSSTRDEP_CHARS(str)                                                   \
                    139:     (JSSTRING_IS_DEPENDENT(JSSTRDEP_BASE(str))                                \
                    140:      ? js_GetDependentStringChars(str)                                        \
                    141:      : JSSTRDEP_BASE(str)->chars + JSSTRDEP_START(str))
                    142: 
                    143: extern size_t
                    144: js_MinimizeDependentStrings(JSString *str, int level, JSString **basep);
                    145: 
                    146: extern jschar *
                    147: js_GetDependentStringChars(JSString *str);
                    148: 
                    149: extern jschar *
                    150: js_GetStringChars(JSString *str);
                    151: 
                    152: extern JSString *
                    153: js_ConcatStrings(JSContext *cx, JSString *left, JSString *right);
                    154: 
                    155: extern const jschar *
                    156: js_UndependString(JSContext *cx, JSString *str);
                    157: 
                    158: struct JSSubString {
                    159:     size_t          length;
                    160:     const jschar    *chars;
                    161: };
                    162: 
                    163: extern jschar      js_empty_ucstr[];
                    164: extern JSSubString js_EmptySubString;
                    165: 
                    166: /* Unicode character attribute lookup tables. */
                    167: extern const uint8 js_X[];
                    168: extern const uint8 js_Y[];
                    169: extern const uint32 js_A[];
                    170: 
                    171: /* Enumerated Unicode general category types. */
                    172: typedef enum JSCharType {
                    173:     JSCT_UNASSIGNED             = 0,
                    174:     JSCT_UPPERCASE_LETTER       = 1,
                    175:     JSCT_LOWERCASE_LETTER       = 2,
                    176:     JSCT_TITLECASE_LETTER       = 3,
                    177:     JSCT_MODIFIER_LETTER        = 4,
                    178:     JSCT_OTHER_LETTER           = 5,
                    179:     JSCT_NON_SPACING_MARK       = 6,
                    180:     JSCT_ENCLOSING_MARK         = 7,
                    181:     JSCT_COMBINING_SPACING_MARK = 8,
                    182:     JSCT_DECIMAL_DIGIT_NUMBER   = 9,
                    183:     JSCT_LETTER_NUMBER          = 10,
                    184:     JSCT_OTHER_NUMBER           = 11,
                    185:     JSCT_SPACE_SEPARATOR        = 12,
                    186:     JSCT_LINE_SEPARATOR         = 13,
                    187:     JSCT_PARAGRAPH_SEPARATOR    = 14,
                    188:     JSCT_CONTROL                = 15,
                    189:     JSCT_FORMAT                 = 16,
                    190:     JSCT_PRIVATE_USE            = 18,
                    191:     JSCT_SURROGATE              = 19,
                    192:     JSCT_DASH_PUNCTUATION       = 20,
                    193:     JSCT_START_PUNCTUATION      = 21,
                    194:     JSCT_END_PUNCTUATION        = 22,
                    195:     JSCT_CONNECTOR_PUNCTUATION  = 23,
                    196:     JSCT_OTHER_PUNCTUATION      = 24,
                    197:     JSCT_MATH_SYMBOL            = 25,
                    198:     JSCT_CURRENCY_SYMBOL        = 26,
                    199:     JSCT_MODIFIER_SYMBOL        = 27,
                    200:     JSCT_OTHER_SYMBOL           = 28
                    201: } JSCharType;
                    202: 
                    203: /* Character classifying and mapping macros, based on java.lang.Character. */
                    204: #define JS_CCODE(c)     (js_A[js_Y[(js_X[(uint16)(c)>>6]<<6)|((c)&0x3F)]])
                    205: #define JS_CTYPE(c)     (JS_CCODE(c) & 0x1F)
                    206: 
                    207: #define JS_ISALPHA(c)   ((((1 << JSCT_UPPERCASE_LETTER) |                     \
                    208:                           (1 << JSCT_LOWERCASE_LETTER) |                     \
                    209:                           (1 << JSCT_TITLECASE_LETTER) |                     \
                    210:                           (1 << JSCT_MODIFIER_LETTER) |                      \
                    211:                           (1 << JSCT_OTHER_LETTER))                          \
                    212:                          >> JS_CTYPE(c)) & 1)
                    213: 
                    214: #define JS_ISALNUM(c)   ((((1 << JSCT_UPPERCASE_LETTER) |                     \
                    215:                           (1 << JSCT_LOWERCASE_LETTER) |                     \
                    216:                           (1 << JSCT_TITLECASE_LETTER) |                     \
                    217:                           (1 << JSCT_MODIFIER_LETTER) |                      \
                    218:                           (1 << JSCT_OTHER_LETTER) |                         \
                    219:                           (1 << JSCT_DECIMAL_DIGIT_NUMBER))                  \
                    220:                          >> JS_CTYPE(c)) & 1)
                    221: 
                    222: /* A unicode letter, suitable for use in an identifier. */
                    223: #define JS_ISUC_LETTER(c)   ((((1 << JSCT_UPPERCASE_LETTER) |                 \
                    224:                           (1 << JSCT_LOWERCASE_LETTER) |                     \
                    225:                           (1 << JSCT_TITLECASE_LETTER) |                     \
                    226:                           (1 << JSCT_MODIFIER_LETTER) |                      \
                    227:                           (1 << JSCT_OTHER_LETTER) |                         \
                    228:                           (1 << JSCT_LETTER_NUMBER))                         \
                    229:                          >> JS_CTYPE(c)) & 1)
                    230: 
                    231: /*
                    232: * 'IdentifierPart' from ECMA grammar, is Unicode letter or
                    233: * combining mark or digit or connector punctuation.
                    234: */
                    235: #define JS_ISID_PART(c) ((((1 << JSCT_UPPERCASE_LETTER) |                     \
                    236:                           (1 << JSCT_LOWERCASE_LETTER) |                     \
                    237:                           (1 << JSCT_TITLECASE_LETTER) |                     \
                    238:                           (1 << JSCT_MODIFIER_LETTER) |                      \
                    239:                           (1 << JSCT_OTHER_LETTER) |                         \
                    240:                           (1 << JSCT_LETTER_NUMBER) |                        \
                    241:                           (1 << JSCT_NON_SPACING_MARK) |                     \
                    242:                           (1 << JSCT_COMBINING_SPACING_MARK) |               \
                    243:                           (1 << JSCT_DECIMAL_DIGIT_NUMBER) |                 \
                    244:                           (1 << JSCT_CONNECTOR_PUNCTUATION))                 \
                    245:                          >> JS_CTYPE(c)) & 1)
                    246: 
                    247: /* Unicode control-format characters, ignored in input */
                    248: #define JS_ISFORMAT(c) (((1 << JSCT_FORMAT) >> JS_CTYPE(c)) & 1)
                    249: 
                    250: #define JS_ISWORD(c)    (JS_ISALNUM(c) || (c) == '_')
                    251: 
                    252: /* XXXbe unify on A/X/Y tbls, avoid ctype.h? */
                    253: #define JS_ISIDENT_START(c) (JS_ISUC_LETTER(c) || (c) == '_' || (c) == '$')
                    254: #define JS_ISIDENT(c)       (JS_ISID_PART(c) || (c) == '_' || (c) == '$')
                    255: 
                    256: #define JS_ISDIGIT(c)   (JS_CTYPE(c) == JSCT_DECIMAL_DIGIT_NUMBER)
                    257: 
                    258: /* XXXbe fs, etc. ? */
                    259: #define JS_ISSPACE(c)   ((JS_CCODE(c) & 0x00070000) == 0x00040000)
                    260: #define JS_ISPRINT(c)   ((c) < 128 && isprint(c))
                    261: 
                    262: #define JS_ISUPPER(c)   (JS_CTYPE(c) == JSCT_UPPERCASE_LETTER)
                    263: #define JS_ISLOWER(c)   (JS_CTYPE(c) == JSCT_LOWERCASE_LETTER)
                    264: 
                    265: #define JS_TOUPPER(c)   ((JS_CCODE(c) & 0x00100000) ? (c) - ((int32)JS_CCODE(c) >> 22) : (c))
                    266: #define JS_TOLOWER(c)   ((JS_CCODE(c) & 0x00200000) ? (c) + ((int32)JS_CCODE(c) >> 22) : (c))
                    267: 
                    268: #define JS_TOCTRL(c)    ((c) ^ 64)      /* XXX unsafe! requires uppercase c */
                    269: 
                    270: /* Shorthands for ASCII (7-bit) decimal and hex conversion. */
                    271: #define JS7_ISDEC(c)    ((c) < 128 && isdigit(c))
                    272: #define JS7_UNDEC(c)    ((c) - '0')
                    273: #define JS7_ISHEX(c)    ((c) < 128 && isxdigit(c))
                    274: #define JS7_UNHEX(c)    (uintN)(isdigit(c) ? (c) - '0' : 10 + tolower(c) - 'a')
                    275: #define JS7_ISLET(c)    ((c) < 128 && isalpha(c))
                    276: 
                    277: /* Initialize truly global state associated with JS strings. */
                    278: extern JSBool
                    279: js_InitStringGlobals(void);
                    280: 
                    281: extern void
                    282: js_FreeStringGlobals(void);
                    283: 
                    284: extern void
                    285: js_PurgeDeflatedStringCache(JSString *str);
                    286: 
                    287: /* Initialize per-runtime string state for the first context in the runtime. */
                    288: extern JSBool
                    289: js_InitRuntimeStringState(JSContext *cx);
                    290: 
                    291: extern void
                    292: js_FinishRuntimeStringState(JSContext *cx);
                    293: 
                    294: /* Initialize the String class, returning its prototype object. */
                    295: extern JSObject *
                    296: js_InitStringClass(JSContext *cx, JSObject *obj);
                    297: 
                    298: extern const char js_escape_str[];
                    299: extern const char js_unescape_str[];
                    300: extern const char js_uneval_str[];
                    301: extern const char js_decodeURI_str[];
                    302: extern const char js_encodeURI_str[];
                    303: extern const char js_decodeURIComponent_str[];
                    304: extern const char js_encodeURIComponent_str[];
                    305: 
                    306: /* GC-allocate a string descriptor for the given malloc-allocated chars. */
                    307: extern JSString *
                    308: js_NewString(JSContext *cx, jschar *chars, size_t length, uintN gcflag);
                    309: 
                    310: extern JSString *
                    311: js_NewDependentString(JSContext *cx, JSString *base, size_t start,
                    312:                       size_t length, uintN gcflag);
                    313: 
                    314: /* Copy a counted string and GC-allocate a descriptor for it. */
                    315: extern JSString *
                    316: js_NewStringCopyN(JSContext *cx, const jschar *s, size_t n, uintN gcflag);
                    317: 
                    318: /* Copy a C string and GC-allocate a descriptor for it. */
                    319: extern JSString *
                    320: js_NewStringCopyZ(JSContext *cx, const jschar *s, uintN gcflag);
                    321: 
                    322: /* Free the chars held by str when it is finalized by the GC. */
                    323: extern void
                    324: js_FinalizeString(JSContext *cx, JSString *str);
                    325: 
                    326: extern void
                    327: js_FinalizeStringRT(JSRuntime *rt, JSString *str);
                    328: 
                    329: /* Wrap a string value in a String object. */
                    330: extern JSObject *
                    331: js_StringToObject(JSContext *cx, JSString *str);
                    332: 
                    333: /*
                    334:  * Convert a value to a string, returning null after reporting an error,
                    335:  * otherwise returning a new string reference.
                    336:  */
                    337: extern JSString *
                    338: js_ValueToString(JSContext *cx, jsval v);
                    339: 
                    340: /*
                    341:  * Convert a value to its source expression, returning null after reporting
                    342:  * an error, otherwise returning a new string reference.
                    343:  */
                    344: extern JSString *
                    345: js_ValueToSource(JSContext *cx, jsval v);
                    346: 
                    347: #ifdef HT_ENUMERATE_NEXT       /* XXX don't require jshash.h */
                    348: /*
                    349:  * Compute a hash function from str.
                    350:  */
                    351: extern JSHashNumber
                    352: js_HashString(JSString *str);
                    353: #endif
                    354: 
                    355: /*
                    356:  * Return less than, equal to, or greater than zero depending on whether
                    357:  * str1 is less than, equal to, or greater than str2.
                    358:  */
                    359: extern intN
                    360: js_CompareStrings(JSString *str1, JSString *str2);
                    361: 
                    362: /*
                    363:  * Boyer-Moore-Horspool superlinear search for pat:patlen in text:textlen.
                    364:  * The patlen argument must be positive and no greater than BMH_PATLEN_MAX.
                    365:  * The start argument tells where in text to begin the search.
                    366:  *
                    367:  * Return the index of pat in text, or -1 if not found.
                    368:  */
                    369: #define BMH_CHARSET_SIZE 256    /* ISO-Latin-1 */
                    370: #define BMH_PATLEN_MAX   255    /* skip table element is uint8 */
                    371: 
                    372: #define BMH_BAD_PATTERN  (-2)   /* return value if pat is not ISO-Latin-1 */
                    373: 
                    374: extern jsint
                    375: js_BoyerMooreHorspool(const jschar *text, jsint textlen,
                    376:                       const jschar *pat, jsint patlen,
                    377:                       jsint start);
                    378: 
                    379: extern size_t
                    380: js_strlen(const jschar *s);
                    381: 
                    382: extern jschar *
                    383: js_strchr(const jschar *s, jschar c);
                    384: 
                    385: extern jschar *
                    386: js_strchr_limit(const jschar *s, jschar c, const jschar *limit);
                    387: 
                    388: #define js_strncpy(t, s, n)     memcpy((t), (s), (n) * sizeof(jschar))
                    389: 
                    390: /*
                    391:  * Return s advanced past any Unicode white space characters.
                    392:  */
                    393: extern const jschar *
                    394: js_SkipWhiteSpace(const jschar *s);
                    395: 
                    396: /*
                    397:  * Inflate bytes to JS chars and vice versa.  Report out of memory via cx
                    398:  * and return null on error, otherwise return the jschar or byte vector that
                    399:  * was JS_malloc'ed.
                    400:  */
                    401: extern jschar *
                    402: js_InflateString(JSContext *cx, const char *bytes, size_t length);
                    403: 
                    404: extern char *
                    405: js_DeflateString(JSContext *cx, const jschar *chars, size_t length);
                    406: 
                    407: /*
                    408:  * Inflate bytes to JS chars into a buffer.
                    409:  * 'chars' must be large enough for 'length'+1 jschars.
                    410:  */
                    411: extern void
                    412: js_InflateStringToBuffer(jschar *chars, const char *bytes, size_t length);
                    413: 
                    414: /*
                    415:  * Associate bytes with str in the deflated string cache, returning true on
                    416:  * successful association, false on out of memory.
                    417:  */
                    418: extern JSBool
                    419: js_SetStringBytes(JSString *str, char *bytes, size_t length);
                    420: 
                    421: /*
                    422:  * Find or create a deflated string cache entry for str that contains its
                    423:  * characters chopped from Unicode code points into bytes.
                    424:  */
                    425: extern char *
                    426: js_GetStringBytes(JSString *str);
                    427: 
                    428: JSBool
                    429: js_str_escape(JSContext *cx, JSObject *obj, uintN argc, jsval *argv,
                    430:               jsval *rval);
                    431: 
                    432: JS_END_EXTERN_C
                    433: 
                    434: #endif /* jsstr_h___ */

unix.superglobalmegacorp.com

This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.