|
|
1.1 root 1: /* -*- Mode: C; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2: *
3: * ***** BEGIN LICENSE BLOCK *****
4: * Version: MPL 1.1/GPL 2.0/LGPL 2.1
5: *
6: * The contents of this file are subject to the Mozilla Public License Version
7: * 1.1 (the "License"); you may not use this file except in compliance with
8: * the License. You may obtain a copy of the License at
9: * http://www.mozilla.org/MPL/
10: *
11: * Software distributed under the License is distributed on an "AS IS" basis,
12: * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13: * for the specific language governing rights and limitations under the
14: * License.
15: *
16: * The Original Code is Mozilla Communicator client code, released
17: * March 31, 1998.
18: *
19: * The Initial Developer of the Original Code is
20: * Netscape Communications Corporation.
21: * Portions created by the Initial Developer are Copyright (C) 1998
22: * the Initial Developer. All Rights Reserved.
23: *
24: * Contributor(s):
25: *
26: * Alternatively, the contents of this file may be used under the terms of
27: * either of the GNU General Public License Version 2 or later (the "GPL"),
28: * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
29: * in which case the provisions of the GPL or the LGPL are applicable instead
30: * of those above. If you wish to allow use of your version of this file only
31: * under the terms of either the GPL or the LGPL, and not to allow others to
32: * use your version of this file under the terms of the MPL, indicate your
33: * decision by deleting the provisions above and replace them with the notice
34: * and other provisions required by the GPL or the LGPL. If you do not delete
35: * the provisions above, a recipient may use your version of this file under
36: * the terms of any one of the MPL, the GPL or the LGPL.
37: *
38: * ***** END LICENSE BLOCK ***** */
39:
40: #ifndef jsstr_h___
41: #define jsstr_h___
42: /*
43: * JS string type implementation.
44: *
45: * A JS string is a counted array of unicode characters. To support handoff
46: * of API client memory, the chars are allocated separately from the length,
47: * necessitating a pointer after the count, to form a separately allocated
48: * string descriptor. String descriptors are GC'ed, while their chars are
49: * allocated from the malloc heap.
50: *
51: * When a string is treated as an object (by following it with . or []), the
52: * runtime wraps it with a JSObject whose valueOf method returns the unwrapped
53: * string descriptor.
54: */
55: #include <ctype.h>
56: #include "jspubtd.h"
57: #include "jsprvtd.h"
58: #include "jshash.h"
59:
60: JS_BEGIN_EXTERN_C
61:
62: /*
63: * The original GC-thing "string" type, a flat character string owned by its
64: * GC-thing descriptor. The chars member points to a vector having byte size
65: * (length + 1) * sizeof(jschar), terminated at index length by a zero jschar.
66: * The terminator is purely a backstop, in case the chars pointer flows out to
67: * native code that requires \u0000 termination.
68: *
69: * NB: Always use the JSSTRING_LENGTH and JSSTRING_CHARS accessor macros,
70: * unless you guard str->member uses with !JSSTRING_IS_DEPENDENT(str).
71: */
72: struct JSString {
73: size_t length;
74: jschar *chars;
75: };
76:
77: /*
78: * Overlay structure for a string that depends on another string's characters.
79: * Distinguished by the JSSTRFLAG_DEPENDENT bit being set in length. The base
80: * member may point to another dependent string if JSSTRING_CHARS has not been
81: * called yet. The length chars in a dependent string are stored starting at
82: * base->chars + start, and are not necessarily zero-terminated. If start is
83: * 0, it is not stored, length is a full size_t (minus the JSSTRFLAG_* bits in
84: * the high two positions), and the JSSTRFLAG_PREFIX flag is set.
85: */
86: struct JSDependentString {
87: size_t length;
88: JSString *base;
89: };
90:
91: /* Definitions for flags stored in the high order bits of JSString.length. */
92: #define JSSTRFLAG_BITS 2
93: #define JSSTRFLAG_SHIFT(flg) ((size_t)(flg) << JSSTRING_LENGTH_BITS)
94: #define JSSTRFLAG_MASK JSSTRFLAG_SHIFT(JS_BITMASK(JSSTRFLAG_BITS))
95: #define JSSTRFLAG_DEPENDENT JSSTRFLAG_SHIFT(1)
96: #define JSSTRFLAG_PREFIX JSSTRFLAG_SHIFT(2)
97:
98: /* Universal JSString type inquiry and accessor macros. */
99: #define JSSTRING_BIT(n) ((size_t)1 << (n))
100: #define JSSTRING_BITMASK(n) (JSSTRING_BIT(n) - 1)
101: #define JSSTRING_HAS_FLAG(str,flg) ((str)->length & (flg))
102: #define JSSTRING_IS_DEPENDENT(str) JSSTRING_HAS_FLAG(str, JSSTRFLAG_DEPENDENT)
103: #define JSSTRING_IS_PREFIX(str) JSSTRING_HAS_FLAG(str, JSSTRFLAG_PREFIX)
104: #define JSSTRING_CHARS(str) (JSSTRING_IS_DEPENDENT(str) \
105: ? JSSTRDEP_CHARS(str) \
106: : (str)->chars)
107: #define JSSTRING_LENGTH(str) (JSSTRING_IS_DEPENDENT(str) \
108: ? JSSTRDEP_LENGTH(str) \
109: : (str)->length)
110: #define JSSTRING_LENGTH_BITS (sizeof(size_t) * JS_BITS_PER_BYTE \
111: - JSSTRFLAG_BITS)
112: #define JSSTRING_LENGTH_MASK JSSTRING_BITMASK(JSSTRING_LENGTH_BITS)
113:
114: /* Specific JSDependentString shift/mask accessor and mutator macros. */
115: #define JSSTRDEP_START_BITS (JSSTRING_LENGTH_BITS-JSSTRDEP_LENGTH_BITS)
116: #define JSSTRDEP_START_SHIFT JSSTRDEP_LENGTH_BITS
117: #define JSSTRDEP_START_MASK JSSTRING_BITMASK(JSSTRDEP_START_BITS)
118: #define JSSTRDEP_LENGTH_BITS (JSSTRING_LENGTH_BITS / 2)
119: #define JSSTRDEP_LENGTH_MASK JSSTRING_BITMASK(JSSTRDEP_LENGTH_BITS)
120:
121: #define JSSTRDEP(str) ((JSDependentString *)(str))
122: #define JSSTRDEP_START(str) (JSSTRING_IS_PREFIX(str) ? 0 \
123: : ((JSSTRDEP(str)->length \
124: >> JSSTRDEP_START_SHIFT) \
125: & JSSTRDEP_START_MASK))
126: #define JSSTRDEP_LENGTH(str) (JSSTRDEP(str)->length \
127: & (JSSTRING_IS_PREFIX(str) \
128: ? JSSTRING_LENGTH_MASK \
129: : JSSTRDEP_LENGTH_MASK))
130:
131: #define JSSTRDEP_SET_START_AND_LENGTH(str,off,len) \
132: (JSSTRDEP(str)->length = JSSTRFLAG_DEPENDENT \
133: | ((off) << JSSTRDEP_START_SHIFT) \
134: | (len))
135: #define JSPREFIX_SET_LENGTH(str,len) \
136: (JSSTRDEP(str)->length = JSSTRFLAG_DEPENDENT | JSSTRFLAG_PREFIX | (len))
137:
138: #define JSSTRDEP_BASE(str) (JSSTRDEP(str)->base)
139: #define JSSTRDEP_SET_BASE(str,bstr) (JSSTRDEP(str)->base = (bstr))
140: #define JSPREFIX_BASE(str) JSSTRDEP_BASE(str)
141: #define JSPREFIX_SET_BASE(str,bstr) JSSTRDEP_SET_BASE(str,bstr)
142:
143: #define JSSTRDEP_CHARS(str) \
144: (JSSTRING_IS_DEPENDENT(JSSTRDEP_BASE(str)) \
145: ? js_GetDependentStringChars(str) \
146: : JSSTRDEP_BASE(str)->chars + JSSTRDEP_START(str))
147:
148: extern size_t
149: js_MinimizeDependentStrings(JSString *str, int level, JSString **basep);
150:
151: extern jschar *
152: js_GetDependentStringChars(JSString *str);
153:
154: extern jschar *
155: js_GetStringChars(JSString *str);
156:
157: extern JSString *
158: js_ConcatStrings(JSContext *cx, JSString *left, JSString *right);
159:
160: extern const jschar *
161: js_UndependString(JSContext *cx, JSString *str);
162:
163: struct JSSubString {
164: size_t length;
165: const jschar *chars;
166: };
167:
168: extern jschar js_empty_ucstr[];
169: extern JSSubString js_EmptySubString;
170:
171: /* Unicode character attribute lookup tables. */
172: extern const uint8 js_X[];
173: extern const uint8 js_Y[];
174: extern const uint32 js_A[];
175:
176: /* Enumerated Unicode general category types. */
177: typedef enum JSCharType {
178: JSCT_UNASSIGNED = 0,
179: JSCT_UPPERCASE_LETTER = 1,
180: JSCT_LOWERCASE_LETTER = 2,
181: JSCT_TITLECASE_LETTER = 3,
182: JSCT_MODIFIER_LETTER = 4,
183: JSCT_OTHER_LETTER = 5,
184: JSCT_NON_SPACING_MARK = 6,
185: JSCT_ENCLOSING_MARK = 7,
186: JSCT_COMBINING_SPACING_MARK = 8,
187: JSCT_DECIMAL_DIGIT_NUMBER = 9,
188: JSCT_LETTER_NUMBER = 10,
189: JSCT_OTHER_NUMBER = 11,
190: JSCT_SPACE_SEPARATOR = 12,
191: JSCT_LINE_SEPARATOR = 13,
192: JSCT_PARAGRAPH_SEPARATOR = 14,
193: JSCT_CONTROL = 15,
194: JSCT_FORMAT = 16,
195: JSCT_PRIVATE_USE = 18,
196: JSCT_SURROGATE = 19,
197: JSCT_DASH_PUNCTUATION = 20,
198: JSCT_START_PUNCTUATION = 21,
199: JSCT_END_PUNCTUATION = 22,
200: JSCT_CONNECTOR_PUNCTUATION = 23,
201: JSCT_OTHER_PUNCTUATION = 24,
202: JSCT_MATH_SYMBOL = 25,
203: JSCT_CURRENCY_SYMBOL = 26,
204: JSCT_MODIFIER_SYMBOL = 27,
205: JSCT_OTHER_SYMBOL = 28
206: } JSCharType;
207:
208: /* Character classifying and mapping macros, based on java.lang.Character. */
209: #define JS_CCODE(c) (js_A[js_Y[(js_X[(uint16)(c)>>6]<<6)|((c)&0x3F)]])
210: #define JS_CTYPE(c) (JS_CCODE(c) & 0x1F)
211:
212: #define JS_ISALPHA(c) ((((1 << JSCT_UPPERCASE_LETTER) | \
213: (1 << JSCT_LOWERCASE_LETTER) | \
214: (1 << JSCT_TITLECASE_LETTER) | \
215: (1 << JSCT_MODIFIER_LETTER) | \
216: (1 << JSCT_OTHER_LETTER)) \
217: >> JS_CTYPE(c)) & 1)
218:
219: #define JS_ISALNUM(c) ((((1 << JSCT_UPPERCASE_LETTER) | \
220: (1 << JSCT_LOWERCASE_LETTER) | \
221: (1 << JSCT_TITLECASE_LETTER) | \
222: (1 << JSCT_MODIFIER_LETTER) | \
223: (1 << JSCT_OTHER_LETTER) | \
224: (1 << JSCT_DECIMAL_DIGIT_NUMBER)) \
225: >> JS_CTYPE(c)) & 1)
226:
227: /* A unicode letter, suitable for use in an identifier. */
228: #define JS_ISUC_LETTER(c) ((((1 << JSCT_UPPERCASE_LETTER) | \
229: (1 << JSCT_LOWERCASE_LETTER) | \
230: (1 << JSCT_TITLECASE_LETTER) | \
231: (1 << JSCT_MODIFIER_LETTER) | \
232: (1 << JSCT_OTHER_LETTER) | \
233: (1 << JSCT_LETTER_NUMBER)) \
234: >> JS_CTYPE(c)) & 1)
235:
236: /*
237: * 'IdentifierPart' from ECMA grammar, is Unicode letter or combining mark or
238: * digit or connector punctuation.
239: */
240: #define JS_ISID_PART(c) ((((1 << JSCT_UPPERCASE_LETTER) | \
241: (1 << JSCT_LOWERCASE_LETTER) | \
242: (1 << JSCT_TITLECASE_LETTER) | \
243: (1 << JSCT_MODIFIER_LETTER) | \
244: (1 << JSCT_OTHER_LETTER) | \
245: (1 << JSCT_LETTER_NUMBER) | \
246: (1 << JSCT_NON_SPACING_MARK) | \
247: (1 << JSCT_COMBINING_SPACING_MARK) | \
248: (1 << JSCT_DECIMAL_DIGIT_NUMBER) | \
249: (1 << JSCT_CONNECTOR_PUNCTUATION)) \
250: >> JS_CTYPE(c)) & 1)
251:
252: /* Unicode control-format characters, ignored in input */
253: #define JS_ISFORMAT(c) (((1 << JSCT_FORMAT) >> JS_CTYPE(c)) & 1)
254:
255: /*
256: * Per ECMA-262 15.10.2.6, these characters are the only ones that make up a
257: * "word", as far as a RegExp is concerned. If we want a Unicode-friendlier
258: * definition of "word", we should rename this macro to something regexp-y.
259: */
260: #define JS_ISWORD(c) ((c) < 128 && (isalnum(c) || (c) == '_'))
261:
262: /* XXXbe unify on A/X/Y tbls, avoid ctype.h? */
263: #define JS_ISIDENT_START(c) (JS_ISUC_LETTER(c) || (c) == '_' || (c) == '$')
264: #define JS_ISIDENT(c) (JS_ISID_PART(c) || (c) == '_' || (c) == '$')
265:
266: #define JS_ISDIGIT(c) (JS_CTYPE(c) == JSCT_DECIMAL_DIGIT_NUMBER)
267:
268: /* XXXbe fs, etc. ? */
269: #define JS_ISSPACE(c) ((JS_CCODE(c) & 0x00070000) == 0x00040000)
270: #define JS_ISPRINT(c) ((c) < 128 && isprint(c))
271:
272: #define JS_ISUPPER(c) (JS_CTYPE(c) == JSCT_UPPERCASE_LETTER)
273: #define JS_ISLOWER(c) (JS_CTYPE(c) == JSCT_LOWERCASE_LETTER)
274:
275: #define JS_TOUPPER(c) ((jschar) ((JS_CCODE(c) & 0x00100000) \
276: ? (c) - ((int32)JS_CCODE(c) >> 22) \
277: : (c)))
278: #define JS_TOLOWER(c) ((jschar) ((JS_CCODE(c) & 0x00200000) \
279: ? (c) + ((int32)JS_CCODE(c) >> 22) \
280: : (c)))
281:
282: #define JS_TOCTRL(c) ((c) ^ 64) /* XXX unsafe! requires uppercase c */
283:
284: /* Shorthands for ASCII (7-bit) decimal and hex conversion. */
285: #define JS7_ISDEC(c) ((c) < 128 && isdigit(c))
286: #define JS7_UNDEC(c) ((c) - '0')
287: #define JS7_ISHEX(c) ((c) < 128 && isxdigit(c))
288: #define JS7_UNHEX(c) (uintN)(isdigit(c) ? (c) - '0' : 10 + tolower(c) - 'a')
289: #define JS7_ISLET(c) ((c) < 128 && isalpha(c))
290:
291: /* Initialize truly global state associated with JS strings. */
292: extern JSBool
293: js_InitStringGlobals(void);
294:
295: extern void
296: js_FreeStringGlobals(void);
297:
298: extern void
299: js_PurgeDeflatedStringCache(JSString *str);
300:
301: /* Initialize per-runtime string state for the first context in the runtime. */
302: extern JSBool
303: js_InitRuntimeStringState(JSContext *cx);
304:
305: extern void
306: js_FinishRuntimeStringState(JSContext *cx);
307:
308: /* Initialize the String class, returning its prototype object. */
309: extern JSObject *
310: js_InitStringClass(JSContext *cx, JSObject *obj);
311:
312: extern const char js_escape_str[];
313: extern const char js_unescape_str[];
314: extern const char js_uneval_str[];
315: extern const char js_decodeURI_str[];
316: extern const char js_encodeURI_str[];
317: extern const char js_decodeURIComponent_str[];
318: extern const char js_encodeURIComponent_str[];
319:
320: /* GC-allocate a string descriptor for the given malloc-allocated chars. */
321: extern JSString *
322: js_NewString(JSContext *cx, jschar *chars, size_t length, uintN gcflag);
323:
324: extern JSString *
325: js_NewDependentString(JSContext *cx, JSString *base, size_t start,
326: size_t length, uintN gcflag);
327:
328: /* Copy a counted string and GC-allocate a descriptor for it. */
329: extern JSString *
330: js_NewStringCopyN(JSContext *cx, const jschar *s, size_t n, uintN gcflag);
331:
332: /* Copy a C string and GC-allocate a descriptor for it. */
333: extern JSString *
334: js_NewStringCopyZ(JSContext *cx, const jschar *s, uintN gcflag);
335:
336: /* Free the chars held by str when it is finalized by the GC. */
337: extern void
338: js_FinalizeString(JSContext *cx, JSString *str);
339:
340: extern void
341: js_FinalizeStringRT(JSRuntime *rt, JSString *str);
342:
343: /* Wrap a string value in a String object. */
344: extern JSObject *
345: js_StringToObject(JSContext *cx, JSString *str);
346:
347: /*
348: * Convert a value to a string, returning null after reporting an error,
349: * otherwise returning a new string reference.
350: */
351: extern JSString *
352: js_ValueToString(JSContext *cx, jsval v);
353:
354: /*
355: * Convert a value to its source expression, returning null after reporting
356: * an error, otherwise returning a new string reference.
357: */
358: extern JSString *
359: js_ValueToSource(JSContext *cx, jsval v);
360:
361: #ifdef HT_ENUMERATE_NEXT /* XXX don't require jshash.h */
362: /*
363: * Compute a hash function from str.
364: */
365: extern JSHashNumber
366: js_HashString(JSString *str);
367: #endif
368:
369: /*
370: * Return less than, equal to, or greater than zero depending on whether
371: * str1 is less than, equal to, or greater than str2.
372: */
373: extern intN
374: js_CompareStrings(JSString *str1, JSString *str2);
375:
376: /*
377: * Boyer-Moore-Horspool superlinear search for pat:patlen in text:textlen.
378: * The patlen argument must be positive and no greater than BMH_PATLEN_MAX.
379: * The start argument tells where in text to begin the search.
380: *
381: * Return the index of pat in text, or -1 if not found.
382: */
383: #define BMH_CHARSET_SIZE 256 /* ISO-Latin-1 */
384: #define BMH_PATLEN_MAX 255 /* skip table element is uint8 */
385:
386: #define BMH_BAD_PATTERN (-2) /* return value if pat is not ISO-Latin-1 */
387:
388: extern jsint
389: js_BoyerMooreHorspool(const jschar *text, jsint textlen,
390: const jschar *pat, jsint patlen,
391: jsint start);
392:
393: extern size_t
394: js_strlen(const jschar *s);
395:
396: extern jschar *
397: js_strchr(const jschar *s, jschar c);
398:
399: extern jschar *
400: js_strchr_limit(const jschar *s, jschar c, const jschar *limit);
401:
402: #define js_strncpy(t, s, n) memcpy((t), (s), (n) * sizeof(jschar))
403:
404: /*
405: * Return s advanced past any Unicode white space characters.
406: */
407: extern const jschar *
408: js_SkipWhiteSpace(const jschar *s);
409:
410: /*
411: * Inflate bytes to JS chars and vice versa. Report out of memory via cx
412: * and return null on error, otherwise return the jschar or byte vector that
413: * was JS_malloc'ed.
414: */
415: extern jschar *
416: js_InflateString(JSContext *cx, const char *bytes, size_t length);
417:
418: extern char *
419: js_DeflateString(JSContext *cx, const jschar *chars, size_t length);
420:
421: /*
422: * Inflate bytes to JS chars into a buffer.
423: * 'chars' must be large enough for 'length'+1 jschars.
424: */
425: extern void
426: js_InflateStringToBuffer(jschar *chars, const char *bytes, size_t length);
427:
428: /*
429: * Associate bytes with str in the deflated string cache, returning true on
430: * successful association, false on out of memory.
431: */
432: extern JSBool
433: js_SetStringBytes(JSString *str, char *bytes, size_t length);
434:
435: /*
436: * Find or create a deflated string cache entry for str that contains its
437: * characters chopped from Unicode code points into bytes.
438: */
439: extern char *
440: js_GetStringBytes(JSString *str);
441:
442: JSBool
443: js_str_escape(JSContext *cx, JSObject *obj, uintN argc, jsval *argv,
444: jsval *rval);
445:
446: JS_END_EXTERN_C
447:
448: #endif /* jsstr_h___ */
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.