|
|
1.1 ! root 1: /* -*- Mode: C; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- ! 2: * ! 3: * ***** BEGIN LICENSE BLOCK ***** ! 4: * Version: MPL 1.1/GPL 2.0/LGPL 2.1 ! 5: * ! 6: * The contents of this file are subject to the Mozilla Public License Version ! 7: * 1.1 (the "License"); you may not use this file except in compliance with ! 8: * the License. You may obtain a copy of the License at ! 9: * http://www.mozilla.org/MPL/ ! 10: * ! 11: * Software distributed under the License is distributed on an "AS IS" basis, ! 12: * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License ! 13: * for the specific language governing rights and limitations under the ! 14: * License. ! 15: * ! 16: * The Original Code is Mozilla Communicator client code, released ! 17: * March 31, 1998. ! 18: * ! 19: * The Initial Developer of the Original Code is ! 20: * Netscape Communications Corporation. ! 21: * Portions created by the Initial Developer are Copyright (C) 1998 ! 22: * the Initial Developer. All Rights Reserved. ! 23: * ! 24: * Contributor(s): ! 25: * ! 26: * Alternatively, the contents of this file may be used under the terms of ! 27: * either of the GNU General Public License Version 2 or later (the "GPL"), ! 28: * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), ! 29: * in which case the provisions of the GPL or the LGPL are applicable instead ! 30: * of those above. If you wish to allow use of your version of this file only ! 31: * under the terms of either the GPL or the LGPL, and not to allow others to ! 32: * use your version of this file under the terms of the MPL, indicate your ! 33: * decision by deleting the provisions above and replace them with the notice ! 34: * and other provisions required by the GPL or the LGPL. If you do not delete ! 35: * the provisions above, a recipient may use your version of this file under ! 36: * the terms of any one of the MPL, the GPL or the LGPL. ! 37: * ! 38: * ***** END LICENSE BLOCK ***** */ ! 39: ! 40: #ifndef jsstr_h___ ! 41: #define jsstr_h___ ! 42: /* ! 43: * JS string type implementation. ! 44: * ! 45: * A JS string is a counted array of unicode characters. To support handoff ! 46: * of API client memory, the chars are allocated separately from the length, ! 47: * necessitating a pointer after the count, to form a separately allocated ! 48: * string descriptor. String descriptors are GC'ed, while their chars are ! 49: * allocated from the malloc heap. ! 50: * ! 51: * When a string is treated as an object (by following it with . or []), the ! 52: * runtime wraps it with a JSObject whose valueOf method returns the unwrapped ! 53: * string descriptor. ! 54: */ ! 55: #include <ctype.h> ! 56: #include "jspubtd.h" ! 57: #include "jsprvtd.h" ! 58: #include "jshash.h" ! 59: ! 60: JS_BEGIN_EXTERN_C ! 61: ! 62: /* ! 63: * The original GC-thing "string" type, a flat character string owned by its ! 64: * GC-thing descriptor. The chars member points to a vector having byte size ! 65: * (length + 1) * sizeof(jschar), terminated at index length by a zero jschar. ! 66: * The terminator is purely a backstop, in case the chars pointer flows out to ! 67: * native code that requires \u0000 termination. ! 68: * ! 69: * NB: Always use the JSSTRING_LENGTH and JSSTRING_CHARS accessor macros, ! 70: * unless you guard str->member uses with !JSSTRING_IS_DEPENDENT(str). ! 71: */ ! 72: struct JSString { ! 73: size_t length; ! 74: jschar *chars; ! 75: }; ! 76: ! 77: /* ! 78: * Overlay structure for a string that depends on another string's characters. ! 79: * Distinguished by the JSSTRFLAG_DEPENDENT bit being set in length. The base ! 80: * member may point to another dependent string if JSSTRING_CHARS has not been ! 81: * called yet. The length chars in a dependent string are stored starting at ! 82: * base->chars + start, and are not necessarily zero-terminated. If start is ! 83: * 0, it is not stored, length is a full size_t (minus the JSSTRFLAG_* bits in ! 84: * the high two positions), and the JSSTRFLAG_PREFIX flag is set. ! 85: */ ! 86: struct JSDependentString { ! 87: size_t length; ! 88: JSString *base; ! 89: }; ! 90: ! 91: /* Definitions for flags stored in the high order bits of JSString.length. */ ! 92: #define JSSTRFLAG_BITS 2 ! 93: #define JSSTRFLAG_SHIFT(flg) ((size_t)(flg) << JSSTRING_LENGTH_BITS) ! 94: #define JSSTRFLAG_MASK JSSTRFLAG_SHIFT(JS_BITMASK(JSSTRFLAG_BITS)) ! 95: #define JSSTRFLAG_DEPENDENT JSSTRFLAG_SHIFT(1) ! 96: #define JSSTRFLAG_PREFIX JSSTRFLAG_SHIFT(2) ! 97: ! 98: /* Universal JSString type inquiry and accessor macros. */ ! 99: #define JSSTRING_BIT(n) ((size_t)1 << (n)) ! 100: #define JSSTRING_BITMASK(n) (JSSTRING_BIT(n) - 1) ! 101: #define JSSTRING_HAS_FLAG(str,flg) ((str)->length & (flg)) ! 102: #define JSSTRING_IS_DEPENDENT(str) JSSTRING_HAS_FLAG(str, JSSTRFLAG_DEPENDENT) ! 103: #define JSSTRING_IS_PREFIX(str) JSSTRING_HAS_FLAG(str, JSSTRFLAG_PREFIX) ! 104: #define JSSTRING_CHARS(str) (JSSTRING_IS_DEPENDENT(str) \ ! 105: ? JSSTRDEP_CHARS(str) \ ! 106: : (str)->chars) ! 107: #define JSSTRING_LENGTH(str) (JSSTRING_IS_DEPENDENT(str) \ ! 108: ? JSSTRDEP_LENGTH(str) \ ! 109: : (str)->length) ! 110: #define JSSTRING_LENGTH_BITS (sizeof(size_t) * JS_BITS_PER_BYTE \ ! 111: - JSSTRFLAG_BITS) ! 112: #define JSSTRING_LENGTH_MASK JSSTRING_BITMASK(JSSTRING_LENGTH_BITS) ! 113: ! 114: /* Specific JSDependentString shift/mask accessor and mutator macros. */ ! 115: #define JSSTRDEP_START_BITS (JSSTRING_LENGTH_BITS-JSSTRDEP_LENGTH_BITS) ! 116: #define JSSTRDEP_START_SHIFT JSSTRDEP_LENGTH_BITS ! 117: #define JSSTRDEP_START_MASK JSSTRING_BITMASK(JSSTRDEP_START_BITS) ! 118: #define JSSTRDEP_LENGTH_BITS (JSSTRING_LENGTH_BITS / 2) ! 119: #define JSSTRDEP_LENGTH_MASK JSSTRING_BITMASK(JSSTRDEP_LENGTH_BITS) ! 120: ! 121: #define JSSTRDEP(str) ((JSDependentString *)(str)) ! 122: #define JSSTRDEP_START(str) (JSSTRING_IS_PREFIX(str) ? 0 \ ! 123: : ((JSSTRDEP(str)->length \ ! 124: >> JSSTRDEP_START_SHIFT) \ ! 125: & JSSTRDEP_START_MASK)) ! 126: #define JSSTRDEP_LENGTH(str) (JSSTRDEP(str)->length \ ! 127: & (JSSTRING_IS_PREFIX(str) \ ! 128: ? JSSTRING_LENGTH_MASK \ ! 129: : JSSTRDEP_LENGTH_MASK)) ! 130: ! 131: #define JSSTRDEP_SET_START_AND_LENGTH(str,off,len) \ ! 132: (JSSTRDEP(str)->length = JSSTRFLAG_DEPENDENT \ ! 133: | ((off) << JSSTRDEP_START_SHIFT) \ ! 134: | (len)) ! 135: #define JSPREFIX_SET_LENGTH(str,len) \ ! 136: (JSSTRDEP(str)->length = JSSTRFLAG_DEPENDENT | JSSTRFLAG_PREFIX | (len)) ! 137: ! 138: #define JSSTRDEP_BASE(str) (JSSTRDEP(str)->base) ! 139: #define JSSTRDEP_SET_BASE(str,bstr) (JSSTRDEP(str)->base = (bstr)) ! 140: #define JSPREFIX_BASE(str) JSSTRDEP_BASE(str) ! 141: #define JSPREFIX_SET_BASE(str,bstr) JSSTRDEP_SET_BASE(str,bstr) ! 142: ! 143: #define JSSTRDEP_CHARS(str) \ ! 144: (JSSTRING_IS_DEPENDENT(JSSTRDEP_BASE(str)) \ ! 145: ? js_GetDependentStringChars(str) \ ! 146: : JSSTRDEP_BASE(str)->chars + JSSTRDEP_START(str)) ! 147: ! 148: extern size_t ! 149: js_MinimizeDependentStrings(JSString *str, int level, JSString **basep); ! 150: ! 151: extern jschar * ! 152: js_GetDependentStringChars(JSString *str); ! 153: ! 154: extern jschar * ! 155: js_GetStringChars(JSString *str); ! 156: ! 157: extern JSString * ! 158: js_ConcatStrings(JSContext *cx, JSString *left, JSString *right); ! 159: ! 160: extern const jschar * ! 161: js_UndependString(JSContext *cx, JSString *str); ! 162: ! 163: struct JSSubString { ! 164: size_t length; ! 165: const jschar *chars; ! 166: }; ! 167: ! 168: extern jschar js_empty_ucstr[]; ! 169: extern JSSubString js_EmptySubString; ! 170: ! 171: /* Unicode character attribute lookup tables. */ ! 172: extern const uint8 js_X[]; ! 173: extern const uint8 js_Y[]; ! 174: extern const uint32 js_A[]; ! 175: ! 176: /* Enumerated Unicode general category types. */ ! 177: typedef enum JSCharType { ! 178: JSCT_UNASSIGNED = 0, ! 179: JSCT_UPPERCASE_LETTER = 1, ! 180: JSCT_LOWERCASE_LETTER = 2, ! 181: JSCT_TITLECASE_LETTER = 3, ! 182: JSCT_MODIFIER_LETTER = 4, ! 183: JSCT_OTHER_LETTER = 5, ! 184: JSCT_NON_SPACING_MARK = 6, ! 185: JSCT_ENCLOSING_MARK = 7, ! 186: JSCT_COMBINING_SPACING_MARK = 8, ! 187: JSCT_DECIMAL_DIGIT_NUMBER = 9, ! 188: JSCT_LETTER_NUMBER = 10, ! 189: JSCT_OTHER_NUMBER = 11, ! 190: JSCT_SPACE_SEPARATOR = 12, ! 191: JSCT_LINE_SEPARATOR = 13, ! 192: JSCT_PARAGRAPH_SEPARATOR = 14, ! 193: JSCT_CONTROL = 15, ! 194: JSCT_FORMAT = 16, ! 195: JSCT_PRIVATE_USE = 18, ! 196: JSCT_SURROGATE = 19, ! 197: JSCT_DASH_PUNCTUATION = 20, ! 198: JSCT_START_PUNCTUATION = 21, ! 199: JSCT_END_PUNCTUATION = 22, ! 200: JSCT_CONNECTOR_PUNCTUATION = 23, ! 201: JSCT_OTHER_PUNCTUATION = 24, ! 202: JSCT_MATH_SYMBOL = 25, ! 203: JSCT_CURRENCY_SYMBOL = 26, ! 204: JSCT_MODIFIER_SYMBOL = 27, ! 205: JSCT_OTHER_SYMBOL = 28 ! 206: } JSCharType; ! 207: ! 208: /* Character classifying and mapping macros, based on java.lang.Character. */ ! 209: #define JS_CCODE(c) (js_A[js_Y[(js_X[(uint16)(c)>>6]<<6)|((c)&0x3F)]]) ! 210: #define JS_CTYPE(c) (JS_CCODE(c) & 0x1F) ! 211: ! 212: #define JS_ISALPHA(c) ((((1 << JSCT_UPPERCASE_LETTER) | \ ! 213: (1 << JSCT_LOWERCASE_LETTER) | \ ! 214: (1 << JSCT_TITLECASE_LETTER) | \ ! 215: (1 << JSCT_MODIFIER_LETTER) | \ ! 216: (1 << JSCT_OTHER_LETTER)) \ ! 217: >> JS_CTYPE(c)) & 1) ! 218: ! 219: #define JS_ISALNUM(c) ((((1 << JSCT_UPPERCASE_LETTER) | \ ! 220: (1 << JSCT_LOWERCASE_LETTER) | \ ! 221: (1 << JSCT_TITLECASE_LETTER) | \ ! 222: (1 << JSCT_MODIFIER_LETTER) | \ ! 223: (1 << JSCT_OTHER_LETTER) | \ ! 224: (1 << JSCT_DECIMAL_DIGIT_NUMBER)) \ ! 225: >> JS_CTYPE(c)) & 1) ! 226: ! 227: /* A unicode letter, suitable for use in an identifier. */ ! 228: #define JS_ISUC_LETTER(c) ((((1 << JSCT_UPPERCASE_LETTER) | \ ! 229: (1 << JSCT_LOWERCASE_LETTER) | \ ! 230: (1 << JSCT_TITLECASE_LETTER) | \ ! 231: (1 << JSCT_MODIFIER_LETTER) | \ ! 232: (1 << JSCT_OTHER_LETTER) | \ ! 233: (1 << JSCT_LETTER_NUMBER)) \ ! 234: >> JS_CTYPE(c)) & 1) ! 235: ! 236: /* ! 237: * 'IdentifierPart' from ECMA grammar, is Unicode letter or combining mark or ! 238: * digit or connector punctuation. ! 239: */ ! 240: #define JS_ISID_PART(c) ((((1 << JSCT_UPPERCASE_LETTER) | \ ! 241: (1 << JSCT_LOWERCASE_LETTER) | \ ! 242: (1 << JSCT_TITLECASE_LETTER) | \ ! 243: (1 << JSCT_MODIFIER_LETTER) | \ ! 244: (1 << JSCT_OTHER_LETTER) | \ ! 245: (1 << JSCT_LETTER_NUMBER) | \ ! 246: (1 << JSCT_NON_SPACING_MARK) | \ ! 247: (1 << JSCT_COMBINING_SPACING_MARK) | \ ! 248: (1 << JSCT_DECIMAL_DIGIT_NUMBER) | \ ! 249: (1 << JSCT_CONNECTOR_PUNCTUATION)) \ ! 250: >> JS_CTYPE(c)) & 1) ! 251: ! 252: /* Unicode control-format characters, ignored in input */ ! 253: #define JS_ISFORMAT(c) (((1 << JSCT_FORMAT) >> JS_CTYPE(c)) & 1) ! 254: ! 255: /* ! 256: * Per ECMA-262 15.10.2.6, these characters are the only ones that make up a ! 257: * "word", as far as a RegExp is concerned. If we want a Unicode-friendlier ! 258: * definition of "word", we should rename this macro to something regexp-y. ! 259: */ ! 260: #define JS_ISWORD(c) ((c) < 128 && (isalnum(c) || (c) == '_')) ! 261: ! 262: /* XXXbe unify on A/X/Y tbls, avoid ctype.h? */ ! 263: #define JS_ISIDENT_START(c) (JS_ISUC_LETTER(c) || (c) == '_' || (c) == '$') ! 264: #define JS_ISIDENT(c) (JS_ISID_PART(c) || (c) == '_' || (c) == '$') ! 265: ! 266: #define JS_ISDIGIT(c) (JS_CTYPE(c) == JSCT_DECIMAL_DIGIT_NUMBER) ! 267: ! 268: /* XXXbe fs, etc. ? */ ! 269: #define JS_ISSPACE(c) ((JS_CCODE(c) & 0x00070000) == 0x00040000) ! 270: #define JS_ISPRINT(c) ((c) < 128 && isprint(c)) ! 271: ! 272: #define JS_ISUPPER(c) (JS_CTYPE(c) == JSCT_UPPERCASE_LETTER) ! 273: #define JS_ISLOWER(c) (JS_CTYPE(c) == JSCT_LOWERCASE_LETTER) ! 274: ! 275: #define JS_TOUPPER(c) ((jschar) ((JS_CCODE(c) & 0x00100000) \ ! 276: ? (c) - ((int32)JS_CCODE(c) >> 22) \ ! 277: : (c))) ! 278: #define JS_TOLOWER(c) ((jschar) ((JS_CCODE(c) & 0x00200000) \ ! 279: ? (c) + ((int32)JS_CCODE(c) >> 22) \ ! 280: : (c))) ! 281: ! 282: #define JS_TOCTRL(c) ((c) ^ 64) /* XXX unsafe! requires uppercase c */ ! 283: ! 284: /* Shorthands for ASCII (7-bit) decimal and hex conversion. */ ! 285: #define JS7_ISDEC(c) ((c) < 128 && isdigit(c)) ! 286: #define JS7_UNDEC(c) ((c) - '0') ! 287: #define JS7_ISHEX(c) ((c) < 128 && isxdigit(c)) ! 288: #define JS7_UNHEX(c) (uintN)(isdigit(c) ? (c) - '0' : 10 + tolower(c) - 'a') ! 289: #define JS7_ISLET(c) ((c) < 128 && isalpha(c)) ! 290: ! 291: /* Initialize truly global state associated with JS strings. */ ! 292: extern JSBool ! 293: js_InitStringGlobals(void); ! 294: ! 295: extern void ! 296: js_FreeStringGlobals(void); ! 297: ! 298: extern void ! 299: js_PurgeDeflatedStringCache(JSString *str); ! 300: ! 301: /* Initialize per-runtime string state for the first context in the runtime. */ ! 302: extern JSBool ! 303: js_InitRuntimeStringState(JSContext *cx); ! 304: ! 305: extern void ! 306: js_FinishRuntimeStringState(JSContext *cx); ! 307: ! 308: /* Initialize the String class, returning its prototype object. */ ! 309: extern JSObject * ! 310: js_InitStringClass(JSContext *cx, JSObject *obj); ! 311: ! 312: extern const char js_escape_str[]; ! 313: extern const char js_unescape_str[]; ! 314: extern const char js_uneval_str[]; ! 315: extern const char js_decodeURI_str[]; ! 316: extern const char js_encodeURI_str[]; ! 317: extern const char js_decodeURIComponent_str[]; ! 318: extern const char js_encodeURIComponent_str[]; ! 319: ! 320: /* GC-allocate a string descriptor for the given malloc-allocated chars. */ ! 321: extern JSString * ! 322: js_NewString(JSContext *cx, jschar *chars, size_t length, uintN gcflag); ! 323: ! 324: extern JSString * ! 325: js_NewDependentString(JSContext *cx, JSString *base, size_t start, ! 326: size_t length, uintN gcflag); ! 327: ! 328: /* Copy a counted string and GC-allocate a descriptor for it. */ ! 329: extern JSString * ! 330: js_NewStringCopyN(JSContext *cx, const jschar *s, size_t n, uintN gcflag); ! 331: ! 332: /* Copy a C string and GC-allocate a descriptor for it. */ ! 333: extern JSString * ! 334: js_NewStringCopyZ(JSContext *cx, const jschar *s, uintN gcflag); ! 335: ! 336: /* Free the chars held by str when it is finalized by the GC. */ ! 337: extern void ! 338: js_FinalizeString(JSContext *cx, JSString *str); ! 339: ! 340: extern void ! 341: js_FinalizeStringRT(JSRuntime *rt, JSString *str); ! 342: ! 343: /* Wrap a string value in a String object. */ ! 344: extern JSObject * ! 345: js_StringToObject(JSContext *cx, JSString *str); ! 346: ! 347: /* ! 348: * Convert a value to a string, returning null after reporting an error, ! 349: * otherwise returning a new string reference. ! 350: */ ! 351: extern JSString * ! 352: js_ValueToString(JSContext *cx, jsval v); ! 353: ! 354: /* ! 355: * Convert a value to its source expression, returning null after reporting ! 356: * an error, otherwise returning a new string reference. ! 357: */ ! 358: extern JSString * ! 359: js_ValueToSource(JSContext *cx, jsval v); ! 360: ! 361: #ifdef HT_ENUMERATE_NEXT /* XXX don't require jshash.h */ ! 362: /* ! 363: * Compute a hash function from str. ! 364: */ ! 365: extern JSHashNumber ! 366: js_HashString(JSString *str); ! 367: #endif ! 368: ! 369: /* ! 370: * Return less than, equal to, or greater than zero depending on whether ! 371: * str1 is less than, equal to, or greater than str2. ! 372: */ ! 373: extern intN ! 374: js_CompareStrings(JSString *str1, JSString *str2); ! 375: ! 376: /* ! 377: * Boyer-Moore-Horspool superlinear search for pat:patlen in text:textlen. ! 378: * The patlen argument must be positive and no greater than BMH_PATLEN_MAX. ! 379: * The start argument tells where in text to begin the search. ! 380: * ! 381: * Return the index of pat in text, or -1 if not found. ! 382: */ ! 383: #define BMH_CHARSET_SIZE 256 /* ISO-Latin-1 */ ! 384: #define BMH_PATLEN_MAX 255 /* skip table element is uint8 */ ! 385: ! 386: #define BMH_BAD_PATTERN (-2) /* return value if pat is not ISO-Latin-1 */ ! 387: ! 388: extern jsint ! 389: js_BoyerMooreHorspool(const jschar *text, jsint textlen, ! 390: const jschar *pat, jsint patlen, ! 391: jsint start); ! 392: ! 393: extern size_t ! 394: js_strlen(const jschar *s); ! 395: ! 396: extern jschar * ! 397: js_strchr(const jschar *s, jschar c); ! 398: ! 399: extern jschar * ! 400: js_strchr_limit(const jschar *s, jschar c, const jschar *limit); ! 401: ! 402: #define js_strncpy(t, s, n) memcpy((t), (s), (n) * sizeof(jschar)) ! 403: ! 404: /* ! 405: * Return s advanced past any Unicode white space characters. ! 406: */ ! 407: extern const jschar * ! 408: js_SkipWhiteSpace(const jschar *s); ! 409: ! 410: /* ! 411: * Inflate bytes to JS chars and vice versa. Report out of memory via cx ! 412: * and return null on error, otherwise return the jschar or byte vector that ! 413: * was JS_malloc'ed. ! 414: */ ! 415: extern jschar * ! 416: js_InflateString(JSContext *cx, const char *bytes, size_t length); ! 417: ! 418: extern char * ! 419: js_DeflateString(JSContext *cx, const jschar *chars, size_t length); ! 420: ! 421: /* ! 422: * Inflate bytes to JS chars into a buffer. ! 423: * 'chars' must be large enough for 'length'+1 jschars. ! 424: */ ! 425: extern void ! 426: js_InflateStringToBuffer(jschar *chars, const char *bytes, size_t length); ! 427: ! 428: /* ! 429: * Associate bytes with str in the deflated string cache, returning true on ! 430: * successful association, false on out of memory. ! 431: */ ! 432: extern JSBool ! 433: js_SetStringBytes(JSString *str, char *bytes, size_t length); ! 434: ! 435: /* ! 436: * Find or create a deflated string cache entry for str that contains its ! 437: * characters chopped from Unicode code points into bytes. ! 438: */ ! 439: extern char * ! 440: js_GetStringBytes(JSString *str); ! 441: ! 442: JSBool ! 443: js_str_escape(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, ! 444: jsval *rval); ! 445: ! 446: JS_END_EXTERN_C ! 447: ! 448: #endif /* jsstr_h___ */
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.