|
|
1.1 root 1: /* -*- Mode: C; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2: *
3: * The contents of this file are subject to the Netscape Public
4: * License Version 1.1 (the "License"); you may not use this file
5: * except in compliance with the License. You may obtain a copy of
6: * the License at http://www.mozilla.org/NPL/
7: *
8: * Software distributed under the License is distributed on an "AS
9: * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
10: * implied. See the License for the specific language governing
11: * rights and limitations under the License.
12: *
13: * The Original Code is Mozilla Communicator client code, released
14: * March 31, 1998.
15: *
16: * The Initial Developer of the Original Code is Netscape
17: * Communications Corporation. Portions created by Netscape are
18: * Copyright (C) 1998 Netscape Communications Corporation. All
19: * Rights Reserved.
20: *
21: * Contributor(s):
22: *
23: * Alternatively, the contents of this file may be used under the
24: * terms of the GNU Public License (the "GPL"), in which case the
25: * provisions of the GPL are applicable instead of those above.
26: * If you wish to allow use of your version of this file only
27: * under the terms of the GPL and not to allow others to use your
28: * version of this file under the NPL, indicate your decision by
29: * deleting the provisions above and replace them with the notice
30: * and other provisions required by the GPL. If you do not delete
31: * the provisions above, a recipient may use your version of this
32: * file under either the NPL or the GPL.
33: */
34:
35: #ifndef jsstr_h___
36: #define jsstr_h___
37: /*
38: * JS string type implementation.
39: *
40: * A JS string is a counted array of unicode characters. To support handoff
41: * of API client memory, the chars are allocated separately from the length,
42: * necessitating a pointer after the count, to form a separately allocated
43: * string descriptor. String descriptors are GC'ed, while their chars are
44: * allocated from the malloc heap.
45: *
46: * When a string is treated as an object (by following it with . or []), the
47: * runtime wraps it with a JSObject whose valueOf method returns the unwrapped
48: * string descriptor.
49: */
50: #include <ctype.h>
51: #include "jspubtd.h"
52: #include "jsprvtd.h"
53: #include "jshash.h"
54:
55: JS_BEGIN_EXTERN_C
56:
57: /*
58: * The original GC-thing "string" type, a flat character string owned by its
59: * GC-thing descriptor. The chars member points to a vector having byte size
60: * (length + 1) * sizeof(jschar), terminated at index length by a zero jschar.
61: * The terminator is purely a backstop, in case the chars pointer flows out to
62: * native code that requires \u0000 termination.
63: *
64: * NB: Always use the JSSTRING_LENGTH and JSSTRING_CHARS accessor macros,
65: * unless you guard str->member uses with !JSSTRING_IS_DEPENDENT(str).
66: */
67: struct JSString {
68: size_t length;
69: jschar *chars;
70: };
71:
72: /*
73: * Overlay structure for a string that depends on another string's characters.
74: * Distinguished by the JSSTRFLAG_DEPENDENT bit being set in length. The base
75: * member may point to another dependent string if JSSTRING_CHARS has not been
76: * called yet. The length chars in a dependent string are stored starting at
77: * base->chars + start, and are not necessarily zero-terminated. If start is
78: * 0, it is not stored, length is a full size_t (minus the JSSTRFLAG_* bits in
79: * the high two positions), and the JSSTRFLAG_PREFIX flag is set.
80: */
81: struct JSDependentString {
82: size_t length;
83: JSString *base;
84: };
85:
86: /* Definitions for flags stored in the high order bits of JSString.length. */
87: #define JSSTRFLAG_BITS 2
88: #define JSSTRFLAG_SHIFT(flg) ((size_t)(flg) << JSSTRING_LENGTH_BITS)
89: #define JSSTRFLAG_MASK JSSTRFLAG_SHIFT(JS_BITMASK(JSSTRFLAG_BITS))
90: #define JSSTRFLAG_DEPENDENT JSSTRFLAG_SHIFT(1)
91: #define JSSTRFLAG_PREFIX JSSTRFLAG_SHIFT(2)
92:
93: /* Universal JSString type inquiry and accessor macros. */
94: #define JSSTRING_BIT(n) ((size_t)1 << (n))
95: #define JSSTRING_BITMASK(n) (JSSTRING_BIT(n) - 1)
96: #define JSSTRING_HAS_FLAG(str,flg) ((str)->length & (flg))
97: #define JSSTRING_IS_DEPENDENT(str) JSSTRING_HAS_FLAG(str, JSSTRFLAG_DEPENDENT)
98: #define JSSTRING_IS_PREFIX(str) JSSTRING_HAS_FLAG(str, JSSTRFLAG_PREFIX)
99: #define JSSTRING_CHARS(str) (JSSTRING_IS_DEPENDENT(str) \
100: ? JSSTRDEP_CHARS(str) \
101: : (str)->chars)
102: #define JSSTRING_LENGTH(str) (JSSTRING_IS_DEPENDENT(str) \
103: ? JSSTRDEP_LENGTH(str) \
104: : (str)->length)
105: #define JSSTRING_LENGTH_BITS (sizeof(size_t) * JS_BITS_PER_BYTE \
106: - JSSTRFLAG_BITS)
107: #define JSSTRING_LENGTH_MASK JSSTRING_BITMASK(JSSTRING_LENGTH_BITS)
108:
109: /* Specific JSDependentString shift/mask accessor and mutator macros. */
110: #define JSSTRDEP_START_BITS (JSSTRING_LENGTH_BITS-JSSTRDEP_LENGTH_BITS)
111: #define JSSTRDEP_START_SHIFT JSSTRDEP_LENGTH_BITS
112: #define JSSTRDEP_START_MASK JSSTRING_BITMASK(JSSTRDEP_START_BITS)
113: #define JSSTRDEP_LENGTH_BITS (JSSTRING_LENGTH_BITS / 2)
114: #define JSSTRDEP_LENGTH_MASK JSSTRING_BITMASK(JSSTRDEP_LENGTH_BITS)
115:
116: #define JSSTRDEP(str) ((JSDependentString *)(str))
117: #define JSSTRDEP_START(str) (JSSTRING_IS_PREFIX(str) ? 0 \
118: : ((JSSTRDEP(str)->length \
119: >> JSSTRDEP_START_SHIFT) \
120: & JSSTRDEP_START_MASK))
121: #define JSSTRDEP_LENGTH(str) (JSSTRDEP(str)->length \
122: & (JSSTRING_IS_PREFIX(str) \
123: ? JSSTRING_LENGTH_MASK \
124: : JSSTRDEP_LENGTH_MASK))
125:
126: #define JSSTRDEP_SET_START_AND_LENGTH(str,off,len) \
127: (JSSTRDEP(str)->length = JSSTRFLAG_DEPENDENT \
128: | ((off) << JSSTRDEP_START_SHIFT) \
129: | (len))
130: #define JSPREFIX_SET_LENGTH(str,len) \
131: (JSSTRDEP(str)->length = JSSTRFLAG_DEPENDENT | JSSTRFLAG_PREFIX | (len))
132:
133: #define JSSTRDEP_BASE(str) (JSSTRDEP(str)->base)
134: #define JSSTRDEP_SET_BASE(str,bstr) (JSSTRDEP(str)->base = (bstr))
135: #define JSPREFIX_BASE(str) JSSTRDEP_BASE(str)
136: #define JSPREFIX_SET_BASE(str,bstr) JSSTRDEP_SET_BASE(str,bstr)
137:
138: #define JSSTRDEP_CHARS(str) \
139: (JSSTRING_IS_DEPENDENT(JSSTRDEP_BASE(str)) \
140: ? js_GetDependentStringChars(str) \
141: : JSSTRDEP_BASE(str)->chars + JSSTRDEP_START(str))
142:
143: extern size_t
144: js_MinimizeDependentStrings(JSString *str, int level, JSString **basep);
145:
146: extern jschar *
147: js_GetDependentStringChars(JSString *str);
148:
149: extern jschar *
150: js_GetStringChars(JSString *str);
151:
152: extern JSString *
153: js_ConcatStrings(JSContext *cx, JSString *left, JSString *right);
154:
155: extern const jschar *
156: js_UndependString(JSContext *cx, JSString *str);
157:
158: struct JSSubString {
159: size_t length;
160: const jschar *chars;
161: };
162:
163: extern jschar js_empty_ucstr[];
164: extern JSSubString js_EmptySubString;
165:
166: /* Unicode character attribute lookup tables. */
167: extern const uint8 js_X[];
168: extern const uint8 js_Y[];
169: extern const uint32 js_A[];
170:
171: /* Enumerated Unicode general category types. */
172: typedef enum JSCharType {
173: JSCT_UNASSIGNED = 0,
174: JSCT_UPPERCASE_LETTER = 1,
175: JSCT_LOWERCASE_LETTER = 2,
176: JSCT_TITLECASE_LETTER = 3,
177: JSCT_MODIFIER_LETTER = 4,
178: JSCT_OTHER_LETTER = 5,
179: JSCT_NON_SPACING_MARK = 6,
180: JSCT_ENCLOSING_MARK = 7,
181: JSCT_COMBINING_SPACING_MARK = 8,
182: JSCT_DECIMAL_DIGIT_NUMBER = 9,
183: JSCT_LETTER_NUMBER = 10,
184: JSCT_OTHER_NUMBER = 11,
185: JSCT_SPACE_SEPARATOR = 12,
186: JSCT_LINE_SEPARATOR = 13,
187: JSCT_PARAGRAPH_SEPARATOR = 14,
188: JSCT_CONTROL = 15,
189: JSCT_FORMAT = 16,
190: JSCT_PRIVATE_USE = 18,
191: JSCT_SURROGATE = 19,
192: JSCT_DASH_PUNCTUATION = 20,
193: JSCT_START_PUNCTUATION = 21,
194: JSCT_END_PUNCTUATION = 22,
195: JSCT_CONNECTOR_PUNCTUATION = 23,
196: JSCT_OTHER_PUNCTUATION = 24,
197: JSCT_MATH_SYMBOL = 25,
198: JSCT_CURRENCY_SYMBOL = 26,
199: JSCT_MODIFIER_SYMBOL = 27,
200: JSCT_OTHER_SYMBOL = 28
201: } JSCharType;
202:
203: /* Character classifying and mapping macros, based on java.lang.Character. */
204: #define JS_CCODE(c) (js_A[js_Y[(js_X[(uint16)(c)>>6]<<6)|((c)&0x3F)]])
205: #define JS_CTYPE(c) (JS_CCODE(c) & 0x1F)
206:
207: #define JS_ISALPHA(c) ((((1 << JSCT_UPPERCASE_LETTER) | \
208: (1 << JSCT_LOWERCASE_LETTER) | \
209: (1 << JSCT_TITLECASE_LETTER) | \
210: (1 << JSCT_MODIFIER_LETTER) | \
211: (1 << JSCT_OTHER_LETTER)) \
212: >> JS_CTYPE(c)) & 1)
213:
214: #define JS_ISALNUM(c) ((((1 << JSCT_UPPERCASE_LETTER) | \
215: (1 << JSCT_LOWERCASE_LETTER) | \
216: (1 << JSCT_TITLECASE_LETTER) | \
217: (1 << JSCT_MODIFIER_LETTER) | \
218: (1 << JSCT_OTHER_LETTER) | \
219: (1 << JSCT_DECIMAL_DIGIT_NUMBER)) \
220: >> JS_CTYPE(c)) & 1)
221:
222: /* A unicode letter, suitable for use in an identifier. */
223: #define JS_ISUC_LETTER(c) ((((1 << JSCT_UPPERCASE_LETTER) | \
224: (1 << JSCT_LOWERCASE_LETTER) | \
225: (1 << JSCT_TITLECASE_LETTER) | \
226: (1 << JSCT_MODIFIER_LETTER) | \
227: (1 << JSCT_OTHER_LETTER) | \
228: (1 << JSCT_LETTER_NUMBER)) \
229: >> JS_CTYPE(c)) & 1)
230:
231: /*
232: * 'IdentifierPart' from ECMA grammar, is Unicode letter or
233: * combining mark or digit or connector punctuation.
234: */
235: #define JS_ISID_PART(c) ((((1 << JSCT_UPPERCASE_LETTER) | \
236: (1 << JSCT_LOWERCASE_LETTER) | \
237: (1 << JSCT_TITLECASE_LETTER) | \
238: (1 << JSCT_MODIFIER_LETTER) | \
239: (1 << JSCT_OTHER_LETTER) | \
240: (1 << JSCT_LETTER_NUMBER) | \
241: (1 << JSCT_NON_SPACING_MARK) | \
242: (1 << JSCT_COMBINING_SPACING_MARK) | \
243: (1 << JSCT_DECIMAL_DIGIT_NUMBER) | \
244: (1 << JSCT_CONNECTOR_PUNCTUATION)) \
245: >> JS_CTYPE(c)) & 1)
246:
247: /* Unicode control-format characters, ignored in input */
248: #define JS_ISFORMAT(c) (((1 << JSCT_FORMAT) >> JS_CTYPE(c)) & 1)
249:
250: #define JS_ISWORD(c) (JS_ISALNUM(c) || (c) == '_')
251:
252: /* XXXbe unify on A/X/Y tbls, avoid ctype.h? */
253: #define JS_ISIDENT_START(c) (JS_ISUC_LETTER(c) || (c) == '_' || (c) == '$')
254: #define JS_ISIDENT(c) (JS_ISID_PART(c) || (c) == '_' || (c) == '$')
255:
256: #define JS_ISDIGIT(c) (JS_CTYPE(c) == JSCT_DECIMAL_DIGIT_NUMBER)
257:
258: /* XXXbe fs, etc. ? */
259: #define JS_ISSPACE(c) ((JS_CCODE(c) & 0x00070000) == 0x00040000)
260: #define JS_ISPRINT(c) ((c) < 128 && isprint(c))
261:
262: #define JS_ISUPPER(c) (JS_CTYPE(c) == JSCT_UPPERCASE_LETTER)
263: #define JS_ISLOWER(c) (JS_CTYPE(c) == JSCT_LOWERCASE_LETTER)
264:
265: #define JS_TOUPPER(c) ((JS_CCODE(c) & 0x00100000) ? (c) - ((int32)JS_CCODE(c) >> 22) : (c))
266: #define JS_TOLOWER(c) ((JS_CCODE(c) & 0x00200000) ? (c) + ((int32)JS_CCODE(c) >> 22) : (c))
267:
268: #define JS_TOCTRL(c) ((c) ^ 64) /* XXX unsafe! requires uppercase c */
269:
270: /* Shorthands for ASCII (7-bit) decimal and hex conversion. */
271: #define JS7_ISDEC(c) ((c) < 128 && isdigit(c))
272: #define JS7_UNDEC(c) ((c) - '0')
273: #define JS7_ISHEX(c) ((c) < 128 && isxdigit(c))
274: #define JS7_UNHEX(c) (uintN)(isdigit(c) ? (c) - '0' : 10 + tolower(c) - 'a')
275: #define JS7_ISLET(c) ((c) < 128 && isalpha(c))
276:
277: /* Initialize truly global state associated with JS strings. */
278: extern JSBool
279: js_InitStringGlobals(void);
280:
281: extern void
282: js_FreeStringGlobals(void);
283:
284: extern void
285: js_PurgeDeflatedStringCache(JSString *str);
286:
287: /* Initialize per-runtime string state for the first context in the runtime. */
288: extern JSBool
289: js_InitRuntimeStringState(JSContext *cx);
290:
291: extern void
292: js_FinishRuntimeStringState(JSContext *cx);
293:
294: /* Initialize the String class, returning its prototype object. */
295: extern JSObject *
296: js_InitStringClass(JSContext *cx, JSObject *obj);
297:
298: extern const char js_escape_str[];
299: extern const char js_unescape_str[];
300: extern const char js_uneval_str[];
301: extern const char js_decodeURI_str[];
302: extern const char js_encodeURI_str[];
303: extern const char js_decodeURIComponent_str[];
304: extern const char js_encodeURIComponent_str[];
305:
306: /* GC-allocate a string descriptor for the given malloc-allocated chars. */
307: extern JSString *
308: js_NewString(JSContext *cx, jschar *chars, size_t length, uintN gcflag);
309:
310: extern JSString *
311: js_NewDependentString(JSContext *cx, JSString *base, size_t start,
312: size_t length, uintN gcflag);
313:
314: /* Copy a counted string and GC-allocate a descriptor for it. */
315: extern JSString *
316: js_NewStringCopyN(JSContext *cx, const jschar *s, size_t n, uintN gcflag);
317:
318: /* Copy a C string and GC-allocate a descriptor for it. */
319: extern JSString *
320: js_NewStringCopyZ(JSContext *cx, const jschar *s, uintN gcflag);
321:
322: /* Free the chars held by str when it is finalized by the GC. */
323: extern void
324: js_FinalizeString(JSContext *cx, JSString *str);
325:
326: extern void
327: js_FinalizeStringRT(JSRuntime *rt, JSString *str);
328:
329: /* Wrap a string value in a String object. */
330: extern JSObject *
331: js_StringToObject(JSContext *cx, JSString *str);
332:
333: /*
334: * Convert a value to a string, returning null after reporting an error,
335: * otherwise returning a new string reference.
336: */
337: extern JSString *
338: js_ValueToString(JSContext *cx, jsval v);
339:
340: /*
341: * Convert a value to its source expression, returning null after reporting
342: * an error, otherwise returning a new string reference.
343: */
344: extern JSString *
345: js_ValueToSource(JSContext *cx, jsval v);
346:
347: #ifdef HT_ENUMERATE_NEXT /* XXX don't require jshash.h */
348: /*
349: * Compute a hash function from str.
350: */
351: extern JSHashNumber
352: js_HashString(JSString *str);
353: #endif
354:
355: /*
356: * Return less than, equal to, or greater than zero depending on whether
357: * str1 is less than, equal to, or greater than str2.
358: */
359: extern intN
360: js_CompareStrings(JSString *str1, JSString *str2);
361:
362: /*
363: * Boyer-Moore-Horspool superlinear search for pat:patlen in text:textlen.
364: * The patlen argument must be positive and no greater than BMH_PATLEN_MAX.
365: * The start argument tells where in text to begin the search.
366: *
367: * Return the index of pat in text, or -1 if not found.
368: */
369: #define BMH_CHARSET_SIZE 256 /* ISO-Latin-1 */
370: #define BMH_PATLEN_MAX 255 /* skip table element is uint8 */
371:
372: #define BMH_BAD_PATTERN (-2) /* return value if pat is not ISO-Latin-1 */
373:
374: extern jsint
375: js_BoyerMooreHorspool(const jschar *text, jsint textlen,
376: const jschar *pat, jsint patlen,
377: jsint start);
378:
379: extern size_t
380: js_strlen(const jschar *s);
381:
382: extern jschar *
383: js_strchr(const jschar *s, jschar c);
384:
385: extern jschar *
386: js_strchr_limit(const jschar *s, jschar c, const jschar *limit);
387:
388: #define js_strncpy(t, s, n) memcpy((t), (s), (n) * sizeof(jschar))
389:
390: /*
391: * Return s advanced past any Unicode white space characters.
392: */
393: extern const jschar *
394: js_SkipWhiteSpace(const jschar *s);
395:
396: /*
397: * Inflate bytes to JS chars and vice versa. Report out of memory via cx
398: * and return null on error, otherwise return the jschar or byte vector that
399: * was JS_malloc'ed.
400: */
401: extern jschar *
402: js_InflateString(JSContext *cx, const char *bytes, size_t length);
403:
404: extern char *
405: js_DeflateString(JSContext *cx, const jschar *chars, size_t length);
406:
407: /*
408: * Inflate bytes to JS chars into a buffer.
409: * 'chars' must be large enough for 'length'+1 jschars.
410: */
411: extern void
412: js_InflateStringToBuffer(jschar *chars, const char *bytes, size_t length);
413:
414: /*
415: * Associate bytes with str in the deflated string cache, returning true on
416: * successful association, false on out of memory.
417: */
418: extern JSBool
419: js_SetStringBytes(JSString *str, char *bytes, size_t length);
420:
421: /*
422: * Find or create a deflated string cache entry for str that contains its
423: * characters chopped from Unicode code points into bytes.
424: */
425: extern char *
426: js_GetStringBytes(JSString *str);
427:
428: JSBool
429: js_str_escape(JSContext *cx, JSObject *obj, uintN argc, jsval *argv,
430: jsval *rval);
431:
432: JS_END_EXTERN_C
433:
434: #endif /* jsstr_h___ */
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.