|
|
1.1 ! root 1: /* -*- Mode: C; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- ! 2: * ! 3: * The contents of this file are subject to the Netscape Public ! 4: * License Version 1.1 (the "License"); you may not use this file ! 5: * except in compliance with the License. You may obtain a copy of ! 6: * the License at http://www.mozilla.org/NPL/ ! 7: * ! 8: * Software distributed under the License is distributed on an "AS ! 9: * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or ! 10: * implied. See the License for the specific language governing ! 11: * rights and limitations under the License. ! 12: * ! 13: * The Original Code is Mozilla Communicator client code, released ! 14: * March 31, 1998. ! 15: * ! 16: * The Initial Developer of the Original Code is Netscape ! 17: * Communications Corporation. Portions created by Netscape are ! 18: * Copyright (C) 1998 Netscape Communications Corporation. All ! 19: * Rights Reserved. ! 20: * ! 21: * Contributor(s): ! 22: * ! 23: * Alternatively, the contents of this file may be used under the ! 24: * terms of the GNU Public License (the "GPL"), in which case the ! 25: * provisions of the GPL are applicable instead of those above. ! 26: * If you wish to allow use of your version of this file only ! 27: * under the terms of the GPL and not to allow others to use your ! 28: * version of this file under the NPL, indicate your decision by ! 29: * deleting the provisions above and replace them with the notice ! 30: * and other provisions required by the GPL. If you do not delete ! 31: * the provisions above, a recipient may use your version of this ! 32: * file under either the NPL or the GPL. ! 33: */ ! 34: ! 35: #ifndef jsstr_h___ ! 36: #define jsstr_h___ ! 37: /* ! 38: * JS string type implementation. ! 39: * ! 40: * A JS string is a counted array of unicode characters. To support handoff ! 41: * of API client memory, the chars are allocated separately from the length, ! 42: * necessitating a pointer after the count, to form a separately allocated ! 43: * string descriptor. String descriptors are GC'ed, while their chars are ! 44: * allocated from the malloc heap. ! 45: * ! 46: * When a string is treated as an object (by following it with . or []), the ! 47: * runtime wraps it with a JSObject whose valueOf method returns the unwrapped ! 48: * string descriptor. ! 49: */ ! 50: #include <ctype.h> ! 51: #include "jspubtd.h" ! 52: #include "jsprvtd.h" ! 53: #include "jshash.h" ! 54: ! 55: JS_BEGIN_EXTERN_C ! 56: ! 57: /* ! 58: * The original GC-thing "string" type, a flat character string owned by its ! 59: * GC-thing descriptor. The chars member points to a vector having byte size ! 60: * (length + 1) * sizeof(jschar), terminated at index length by a zero jschar. ! 61: * The terminator is purely a backstop, in case the chars pointer flows out to ! 62: * native code that requires \u0000 termination. ! 63: * ! 64: * NB: Always use the JSSTRING_LENGTH and JSSTRING_CHARS accessor macros, ! 65: * unless you guard str->member uses with !JSSTRING_IS_DEPENDENT(str). ! 66: */ ! 67: struct JSString { ! 68: size_t length; ! 69: jschar *chars; ! 70: }; ! 71: ! 72: /* ! 73: * Overlay structure for a string that depends on another string's characters. ! 74: * Distinguished by the JSSTRFLAG_DEPENDENT bit being set in length. The base ! 75: * member may point to another dependent string if JSSTRING_CHARS has not been ! 76: * called yet. The length chars in a dependent string are stored starting at ! 77: * base->chars + start, and are not necessarily zero-terminated. If start is ! 78: * 0, it is not stored, length is a full size_t (minus the JSSTRFLAG_* bits in ! 79: * the high two positions), and the JSSTRFLAG_PREFIX flag is set. ! 80: */ ! 81: struct JSDependentString { ! 82: size_t length; ! 83: JSString *base; ! 84: }; ! 85: ! 86: /* Definitions for flags stored in the high order bits of JSString.length. */ ! 87: #define JSSTRFLAG_BITS 2 ! 88: #define JSSTRFLAG_SHIFT(flg) ((size_t)(flg) << JSSTRING_LENGTH_BITS) ! 89: #define JSSTRFLAG_MASK JSSTRFLAG_SHIFT(JS_BITMASK(JSSTRFLAG_BITS)) ! 90: #define JSSTRFLAG_DEPENDENT JSSTRFLAG_SHIFT(1) ! 91: #define JSSTRFLAG_PREFIX JSSTRFLAG_SHIFT(2) ! 92: ! 93: /* Universal JSString type inquiry and accessor macros. */ ! 94: #define JSSTRING_BIT(n) ((size_t)1 << (n)) ! 95: #define JSSTRING_BITMASK(n) (JSSTRING_BIT(n) - 1) ! 96: #define JSSTRING_HAS_FLAG(str,flg) ((str)->length & (flg)) ! 97: #define JSSTRING_IS_DEPENDENT(str) JSSTRING_HAS_FLAG(str, JSSTRFLAG_DEPENDENT) ! 98: #define JSSTRING_IS_PREFIX(str) JSSTRING_HAS_FLAG(str, JSSTRFLAG_PREFIX) ! 99: #define JSSTRING_CHARS(str) (JSSTRING_IS_DEPENDENT(str) \ ! 100: ? JSSTRDEP_CHARS(str) \ ! 101: : (str)->chars) ! 102: #define JSSTRING_LENGTH(str) (JSSTRING_IS_DEPENDENT(str) \ ! 103: ? JSSTRDEP_LENGTH(str) \ ! 104: : (str)->length) ! 105: #define JSSTRING_LENGTH_BITS (sizeof(size_t) * JS_BITS_PER_BYTE \ ! 106: - JSSTRFLAG_BITS) ! 107: #define JSSTRING_LENGTH_MASK JSSTRING_BITMASK(JSSTRING_LENGTH_BITS) ! 108: ! 109: /* Specific JSDependentString shift/mask accessor and mutator macros. */ ! 110: #define JSSTRDEP_START_BITS (JSSTRING_LENGTH_BITS-JSSTRDEP_LENGTH_BITS) ! 111: #define JSSTRDEP_START_SHIFT JSSTRDEP_LENGTH_BITS ! 112: #define JSSTRDEP_START_MASK JSSTRING_BITMASK(JSSTRDEP_START_BITS) ! 113: #define JSSTRDEP_LENGTH_BITS (JSSTRING_LENGTH_BITS / 2) ! 114: #define JSSTRDEP_LENGTH_MASK JSSTRING_BITMASK(JSSTRDEP_LENGTH_BITS) ! 115: ! 116: #define JSSTRDEP(str) ((JSDependentString *)(str)) ! 117: #define JSSTRDEP_START(str) (JSSTRING_IS_PREFIX(str) ? 0 \ ! 118: : ((JSSTRDEP(str)->length \ ! 119: >> JSSTRDEP_START_SHIFT) \ ! 120: & JSSTRDEP_START_MASK)) ! 121: #define JSSTRDEP_LENGTH(str) (JSSTRDEP(str)->length \ ! 122: & (JSSTRING_IS_PREFIX(str) \ ! 123: ? JSSTRING_LENGTH_MASK \ ! 124: : JSSTRDEP_LENGTH_MASK)) ! 125: ! 126: #define JSSTRDEP_SET_START_AND_LENGTH(str,off,len) \ ! 127: (JSSTRDEP(str)->length = JSSTRFLAG_DEPENDENT \ ! 128: | ((off) << JSSTRDEP_START_SHIFT) \ ! 129: | (len)) ! 130: #define JSPREFIX_SET_LENGTH(str,len) \ ! 131: (JSSTRDEP(str)->length = JSSTRFLAG_DEPENDENT | JSSTRFLAG_PREFIX | (len)) ! 132: ! 133: #define JSSTRDEP_BASE(str) (JSSTRDEP(str)->base) ! 134: #define JSSTRDEP_SET_BASE(str,bstr) (JSSTRDEP(str)->base = (bstr)) ! 135: #define JSPREFIX_BASE(str) JSSTRDEP_BASE(str) ! 136: #define JSPREFIX_SET_BASE(str,bstr) JSSTRDEP_SET_BASE(str,bstr) ! 137: ! 138: #define JSSTRDEP_CHARS(str) \ ! 139: (JSSTRING_IS_DEPENDENT(JSSTRDEP_BASE(str)) \ ! 140: ? js_GetDependentStringChars(str) \ ! 141: : JSSTRDEP_BASE(str)->chars + JSSTRDEP_START(str)) ! 142: ! 143: extern size_t ! 144: js_MinimizeDependentStrings(JSString *str, int level, JSString **basep); ! 145: ! 146: extern jschar * ! 147: js_GetDependentStringChars(JSString *str); ! 148: ! 149: extern jschar * ! 150: js_GetStringChars(JSString *str); ! 151: ! 152: extern JSString * ! 153: js_ConcatStrings(JSContext *cx, JSString *left, JSString *right); ! 154: ! 155: extern const jschar * ! 156: js_UndependString(JSContext *cx, JSString *str); ! 157: ! 158: struct JSSubString { ! 159: size_t length; ! 160: const jschar *chars; ! 161: }; ! 162: ! 163: extern jschar js_empty_ucstr[]; ! 164: extern JSSubString js_EmptySubString; ! 165: ! 166: /* Unicode character attribute lookup tables. */ ! 167: extern const uint8 js_X[]; ! 168: extern const uint8 js_Y[]; ! 169: extern const uint32 js_A[]; ! 170: ! 171: /* Enumerated Unicode general category types. */ ! 172: typedef enum JSCharType { ! 173: JSCT_UNASSIGNED = 0, ! 174: JSCT_UPPERCASE_LETTER = 1, ! 175: JSCT_LOWERCASE_LETTER = 2, ! 176: JSCT_TITLECASE_LETTER = 3, ! 177: JSCT_MODIFIER_LETTER = 4, ! 178: JSCT_OTHER_LETTER = 5, ! 179: JSCT_NON_SPACING_MARK = 6, ! 180: JSCT_ENCLOSING_MARK = 7, ! 181: JSCT_COMBINING_SPACING_MARK = 8, ! 182: JSCT_DECIMAL_DIGIT_NUMBER = 9, ! 183: JSCT_LETTER_NUMBER = 10, ! 184: JSCT_OTHER_NUMBER = 11, ! 185: JSCT_SPACE_SEPARATOR = 12, ! 186: JSCT_LINE_SEPARATOR = 13, ! 187: JSCT_PARAGRAPH_SEPARATOR = 14, ! 188: JSCT_CONTROL = 15, ! 189: JSCT_FORMAT = 16, ! 190: JSCT_PRIVATE_USE = 18, ! 191: JSCT_SURROGATE = 19, ! 192: JSCT_DASH_PUNCTUATION = 20, ! 193: JSCT_START_PUNCTUATION = 21, ! 194: JSCT_END_PUNCTUATION = 22, ! 195: JSCT_CONNECTOR_PUNCTUATION = 23, ! 196: JSCT_OTHER_PUNCTUATION = 24, ! 197: JSCT_MATH_SYMBOL = 25, ! 198: JSCT_CURRENCY_SYMBOL = 26, ! 199: JSCT_MODIFIER_SYMBOL = 27, ! 200: JSCT_OTHER_SYMBOL = 28 ! 201: } JSCharType; ! 202: ! 203: /* Character classifying and mapping macros, based on java.lang.Character. */ ! 204: #define JS_CCODE(c) (js_A[js_Y[(js_X[(uint16)(c)>>6]<<6)|((c)&0x3F)]]) ! 205: #define JS_CTYPE(c) (JS_CCODE(c) & 0x1F) ! 206: ! 207: #define JS_ISALPHA(c) ((((1 << JSCT_UPPERCASE_LETTER) | \ ! 208: (1 << JSCT_LOWERCASE_LETTER) | \ ! 209: (1 << JSCT_TITLECASE_LETTER) | \ ! 210: (1 << JSCT_MODIFIER_LETTER) | \ ! 211: (1 << JSCT_OTHER_LETTER)) \ ! 212: >> JS_CTYPE(c)) & 1) ! 213: ! 214: #define JS_ISALNUM(c) ((((1 << JSCT_UPPERCASE_LETTER) | \ ! 215: (1 << JSCT_LOWERCASE_LETTER) | \ ! 216: (1 << JSCT_TITLECASE_LETTER) | \ ! 217: (1 << JSCT_MODIFIER_LETTER) | \ ! 218: (1 << JSCT_OTHER_LETTER) | \ ! 219: (1 << JSCT_DECIMAL_DIGIT_NUMBER)) \ ! 220: >> JS_CTYPE(c)) & 1) ! 221: ! 222: /* A unicode letter, suitable for use in an identifier. */ ! 223: #define JS_ISUC_LETTER(c) ((((1 << JSCT_UPPERCASE_LETTER) | \ ! 224: (1 << JSCT_LOWERCASE_LETTER) | \ ! 225: (1 << JSCT_TITLECASE_LETTER) | \ ! 226: (1 << JSCT_MODIFIER_LETTER) | \ ! 227: (1 << JSCT_OTHER_LETTER) | \ ! 228: (1 << JSCT_LETTER_NUMBER)) \ ! 229: >> JS_CTYPE(c)) & 1) ! 230: ! 231: /* ! 232: * 'IdentifierPart' from ECMA grammar, is Unicode letter or ! 233: * combining mark or digit or connector punctuation. ! 234: */ ! 235: #define JS_ISID_PART(c) ((((1 << JSCT_UPPERCASE_LETTER) | \ ! 236: (1 << JSCT_LOWERCASE_LETTER) | \ ! 237: (1 << JSCT_TITLECASE_LETTER) | \ ! 238: (1 << JSCT_MODIFIER_LETTER) | \ ! 239: (1 << JSCT_OTHER_LETTER) | \ ! 240: (1 << JSCT_LETTER_NUMBER) | \ ! 241: (1 << JSCT_NON_SPACING_MARK) | \ ! 242: (1 << JSCT_COMBINING_SPACING_MARK) | \ ! 243: (1 << JSCT_DECIMAL_DIGIT_NUMBER) | \ ! 244: (1 << JSCT_CONNECTOR_PUNCTUATION)) \ ! 245: >> JS_CTYPE(c)) & 1) ! 246: ! 247: /* Unicode control-format characters, ignored in input */ ! 248: #define JS_ISFORMAT(c) (((1 << JSCT_FORMAT) >> JS_CTYPE(c)) & 1) ! 249: ! 250: #define JS_ISWORD(c) (JS_ISALNUM(c) || (c) == '_') ! 251: ! 252: /* XXXbe unify on A/X/Y tbls, avoid ctype.h? */ ! 253: #define JS_ISIDENT_START(c) (JS_ISUC_LETTER(c) || (c) == '_' || (c) == '$') ! 254: #define JS_ISIDENT(c) (JS_ISID_PART(c) || (c) == '_' || (c) == '$') ! 255: ! 256: #define JS_ISDIGIT(c) (JS_CTYPE(c) == JSCT_DECIMAL_DIGIT_NUMBER) ! 257: ! 258: /* XXXbe fs, etc. ? */ ! 259: #define JS_ISSPACE(c) ((JS_CCODE(c) & 0x00070000) == 0x00040000) ! 260: #define JS_ISPRINT(c) ((c) < 128 && isprint(c)) ! 261: ! 262: #define JS_ISUPPER(c) (JS_CTYPE(c) == JSCT_UPPERCASE_LETTER) ! 263: #define JS_ISLOWER(c) (JS_CTYPE(c) == JSCT_LOWERCASE_LETTER) ! 264: ! 265: #define JS_TOUPPER(c) ((JS_CCODE(c) & 0x00100000) ? (c) - ((int32)JS_CCODE(c) >> 22) : (c)) ! 266: #define JS_TOLOWER(c) ((JS_CCODE(c) & 0x00200000) ? (c) + ((int32)JS_CCODE(c) >> 22) : (c)) ! 267: ! 268: #define JS_TOCTRL(c) ((c) ^ 64) /* XXX unsafe! requires uppercase c */ ! 269: ! 270: /* Shorthands for ASCII (7-bit) decimal and hex conversion. */ ! 271: #define JS7_ISDEC(c) ((c) < 128 && isdigit(c)) ! 272: #define JS7_UNDEC(c) ((c) - '0') ! 273: #define JS7_ISHEX(c) ((c) < 128 && isxdigit(c)) ! 274: #define JS7_UNHEX(c) (uintN)(isdigit(c) ? (c) - '0' : 10 + tolower(c) - 'a') ! 275: #define JS7_ISLET(c) ((c) < 128 && isalpha(c)) ! 276: ! 277: /* Initialize truly global state associated with JS strings. */ ! 278: extern JSBool ! 279: js_InitStringGlobals(void); ! 280: ! 281: extern void ! 282: js_FreeStringGlobals(void); ! 283: ! 284: extern void ! 285: js_PurgeDeflatedStringCache(JSString *str); ! 286: ! 287: /* Initialize per-runtime string state for the first context in the runtime. */ ! 288: extern JSBool ! 289: js_InitRuntimeStringState(JSContext *cx); ! 290: ! 291: extern void ! 292: js_FinishRuntimeStringState(JSContext *cx); ! 293: ! 294: /* Initialize the String class, returning its prototype object. */ ! 295: extern JSObject * ! 296: js_InitStringClass(JSContext *cx, JSObject *obj); ! 297: ! 298: extern const char js_escape_str[]; ! 299: extern const char js_unescape_str[]; ! 300: extern const char js_uneval_str[]; ! 301: extern const char js_decodeURI_str[]; ! 302: extern const char js_encodeURI_str[]; ! 303: extern const char js_decodeURIComponent_str[]; ! 304: extern const char js_encodeURIComponent_str[]; ! 305: ! 306: /* GC-allocate a string descriptor for the given malloc-allocated chars. */ ! 307: extern JSString * ! 308: js_NewString(JSContext *cx, jschar *chars, size_t length, uintN gcflag); ! 309: ! 310: extern JSString * ! 311: js_NewDependentString(JSContext *cx, JSString *base, size_t start, ! 312: size_t length, uintN gcflag); ! 313: ! 314: /* Copy a counted string and GC-allocate a descriptor for it. */ ! 315: extern JSString * ! 316: js_NewStringCopyN(JSContext *cx, const jschar *s, size_t n, uintN gcflag); ! 317: ! 318: /* Copy a C string and GC-allocate a descriptor for it. */ ! 319: extern JSString * ! 320: js_NewStringCopyZ(JSContext *cx, const jschar *s, uintN gcflag); ! 321: ! 322: /* Free the chars held by str when it is finalized by the GC. */ ! 323: extern void ! 324: js_FinalizeString(JSContext *cx, JSString *str); ! 325: ! 326: extern void ! 327: js_FinalizeStringRT(JSRuntime *rt, JSString *str); ! 328: ! 329: /* Wrap a string value in a String object. */ ! 330: extern JSObject * ! 331: js_StringToObject(JSContext *cx, JSString *str); ! 332: ! 333: /* ! 334: * Convert a value to a string, returning null after reporting an error, ! 335: * otherwise returning a new string reference. ! 336: */ ! 337: extern JSString * ! 338: js_ValueToString(JSContext *cx, jsval v); ! 339: ! 340: /* ! 341: * Convert a value to its source expression, returning null after reporting ! 342: * an error, otherwise returning a new string reference. ! 343: */ ! 344: extern JSString * ! 345: js_ValueToSource(JSContext *cx, jsval v); ! 346: ! 347: #ifdef HT_ENUMERATE_NEXT /* XXX don't require jshash.h */ ! 348: /* ! 349: * Compute a hash function from str. ! 350: */ ! 351: extern JSHashNumber ! 352: js_HashString(JSString *str); ! 353: #endif ! 354: ! 355: /* ! 356: * Return less than, equal to, or greater than zero depending on whether ! 357: * str1 is less than, equal to, or greater than str2. ! 358: */ ! 359: extern intN ! 360: js_CompareStrings(JSString *str1, JSString *str2); ! 361: ! 362: /* ! 363: * Boyer-Moore-Horspool superlinear search for pat:patlen in text:textlen. ! 364: * The patlen argument must be positive and no greater than BMH_PATLEN_MAX. ! 365: * The start argument tells where in text to begin the search. ! 366: * ! 367: * Return the index of pat in text, or -1 if not found. ! 368: */ ! 369: #define BMH_CHARSET_SIZE 256 /* ISO-Latin-1 */ ! 370: #define BMH_PATLEN_MAX 255 /* skip table element is uint8 */ ! 371: ! 372: #define BMH_BAD_PATTERN (-2) /* return value if pat is not ISO-Latin-1 */ ! 373: ! 374: extern jsint ! 375: js_BoyerMooreHorspool(const jschar *text, jsint textlen, ! 376: const jschar *pat, jsint patlen, ! 377: jsint start); ! 378: ! 379: extern size_t ! 380: js_strlen(const jschar *s); ! 381: ! 382: extern jschar * ! 383: js_strchr(const jschar *s, jschar c); ! 384: ! 385: extern jschar * ! 386: js_strchr_limit(const jschar *s, jschar c, const jschar *limit); ! 387: ! 388: #define js_strncpy(t, s, n) memcpy((t), (s), (n) * sizeof(jschar)) ! 389: ! 390: /* ! 391: * Return s advanced past any Unicode white space characters. ! 392: */ ! 393: extern const jschar * ! 394: js_SkipWhiteSpace(const jschar *s); ! 395: ! 396: /* ! 397: * Inflate bytes to JS chars and vice versa. Report out of memory via cx ! 398: * and return null on error, otherwise return the jschar or byte vector that ! 399: * was JS_malloc'ed. ! 400: */ ! 401: extern jschar * ! 402: js_InflateString(JSContext *cx, const char *bytes, size_t length); ! 403: ! 404: extern char * ! 405: js_DeflateString(JSContext *cx, const jschar *chars, size_t length); ! 406: ! 407: /* ! 408: * Inflate bytes to JS chars into a buffer. ! 409: * 'chars' must be large enough for 'length'+1 jschars. ! 410: */ ! 411: extern void ! 412: js_InflateStringToBuffer(jschar *chars, const char *bytes, size_t length); ! 413: ! 414: /* ! 415: * Associate bytes with str in the deflated string cache, returning true on ! 416: * successful association, false on out of memory. ! 417: */ ! 418: extern JSBool ! 419: js_SetStringBytes(JSString *str, char *bytes, size_t length); ! 420: ! 421: /* ! 422: * Find or create a deflated string cache entry for str that contains its ! 423: * characters chopped from Unicode code points into bytes. ! 424: */ ! 425: extern char * ! 426: js_GetStringBytes(JSString *str); ! 427: ! 428: JSBool ! 429: js_str_escape(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, ! 430: jsval *rval); ! 431: ! 432: JS_END_EXTERN_C ! 433: ! 434: #endif /* jsstr_h___ */
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.