Subversion Repositories Open64

[/] [sim/] [fsim/] [shell/] [tclsh/] [generic/] [tclRegexp.c] - Blame information for rev 2072

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 2072 malin
/*
2
 * tclRegexp.c --
3
 *
4
 *  This file contains the public interfaces to the Tcl regular
5
 *  expression mechanism.
6
 *
7
 * Copyright (c) 1998 by Sun Microsystems, Inc.
8
 * Copyright (c) 1998-1999 by Scriptics Corporation.
9
 *
10
 * See the file "license.terms" for information on usage and redistribution
11
 * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
12
 *
13
 * RCS: @(#) $Id: tclRegexp.c,v 1.14.4.2 2006/04/07 01:14:28 hobbs Exp $
14
 */
15
 
16
#include "tclInt.h"
17
#include "tclPort.h"
18
#include "tclRegexp.h"
19
 
20
/*
21
 *----------------------------------------------------------------------
22
 * The routines in this file use Henry Spencer's regular expression
23
 * package contained in the following additional source files:
24
 *
25
 *  regc_color.c  regc_cvec.c regc_lex.c
26
 *  regc_nfa.c  regcomp.c regcustom.h
27
 *  rege_dfa.c  regerror.c  regerrs.h
28
 *  regex.h   regexec.c regfree.c
29
 *  regfronts.c regguts.h
30
 *
31
 * Copyright (c) 1998 Henry Spencer.  All rights reserved.
32
 *
33
 * Development of this software was funded, in part, by Cray Research Inc.,
34
 * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
35
 * Corporation, none of whom are responsible for the results.  The author
36
 * thanks all of them.
37
 *
38
 * Redistribution and use in source and binary forms -- with or without
39
 * modification -- are permitted for any purpose, provided that
40
 * redistributions in source form retain this entire copyright notice and
41
 * indicate the origin and nature of any modifications.
42
 *
43
 * I'd appreciate being given credit for this package in the documentation
44
 * of software which uses it, but that is not a requirement.
45
 *
46
 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
47
 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
48
 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
49
 * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
50
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
51
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
52
 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
53
 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
54
 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
55
 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
56
 *
57
 * *** NOTE: this code has been altered slightly for use in Tcl: ***
58
 * *** 1. Names have been changed, e.g. from re_comp to    ***
59
 * ***    TclRegComp, to avoid clashes with other      ***
60
 * ***    regexp implementations used by applications.     ***
61
 */
62
 
63
/*
64
 * Thread local storage used to maintain a per-thread cache of compiled
65
 * regular expressions.
66
 */
67
 
68
#define NUM_REGEXPS 30
69
 
70
typedef struct ThreadSpecificData {
71
    int initialized;    /* Set to 1 when the module is initialized. */
72
    char *patterns[NUM_REGEXPS];/* Strings corresponding to compiled
73
         * regular expression patterns.  NULL
74
         * means that this slot isn't used.
75
         * Malloc-ed. */
76
    int patLengths[NUM_REGEXPS];/* Number of non-null characters in
77
         * corresponding entry in patterns.
78
         * -1 means entry isn't used. */
79
    struct TclRegexp *regexps[NUM_REGEXPS];
80
        /* Compiled forms of above strings.  Also
81
         * malloc-ed, or NULL if not in use yet. */
82
} ThreadSpecificData;
83
 
84
static Tcl_ThreadDataKey dataKey;
85
 
86
/*
87
 * Declarations for functions used only in this file.
88
 */
89
 
90
static TclRegexp *  CompileRegexp _ANSI_ARGS_((Tcl_Interp *interp,
91
          CONST char *pattern, int length, int flags));
92
static void   DupRegexpInternalRep _ANSI_ARGS_((Tcl_Obj *srcPtr,
93
          Tcl_Obj *copyPtr));
94
static void   FinalizeRegexp _ANSI_ARGS_((ClientData clientData));
95
static void   FreeRegexp _ANSI_ARGS_((TclRegexp *regexpPtr));
96
static void   FreeRegexpInternalRep _ANSI_ARGS_((Tcl_Obj *objPtr));
97
static int    RegExpExecUniChar _ANSI_ARGS_((Tcl_Interp *interp,
98
          Tcl_RegExp re, CONST Tcl_UniChar *uniString,
99
          int numChars, int nmatches, int flags));
100
static int    SetRegexpFromAny _ANSI_ARGS_((Tcl_Interp *interp,
101
          Tcl_Obj *objPtr));
102
 
103
/*
104
 * The regular expression Tcl object type.  This serves as a cache
105
 * of the compiled form of the regular expression.
106
 */
107
 
108
static Tcl_ObjType tclRegexpType = {
109
    "regexp",       /* name */
110
    FreeRegexpInternalRep,    /* freeIntRepProc */
111
    DupRegexpInternalRep,   /* dupIntRepProc */
112
    NULL,       /* updateStringProc */
113
    SetRegexpFromAny      /* setFromAnyProc */
114
};
115
 
116
 
117
/*
118
 *----------------------------------------------------------------------
119
 *
120
 * Tcl_RegExpCompile --
121
 *
122
 *  Compile a regular expression into a form suitable for fast
123
 *  matching.  This procedure is DEPRECATED in favor of the
124
 *  object version of the command.
125
 *
126
 * Results:
127
 *  The return value is a pointer to the compiled form of string,
128
 *  suitable for passing to Tcl_RegExpExec.  This compiled form
129
 *  is only valid up until the next call to this procedure, so
130
 *  don't keep these around for a long time!  If an error occurred
131
 *  while compiling the pattern, then NULL is returned and an error
132
 *  message is left in the interp's result.
133
 *
134
 * Side effects:
135
 *  Updates the cache of compiled regexps.
136
 *
137
 *----------------------------------------------------------------------
138
 */
139
 
140
Tcl_RegExp
141
Tcl_RegExpCompile(interp, string)
142
    Tcl_Interp *interp;   /* For use in error reporting and
143
         * to access the interp regexp cache. */
144
    CONST char *string;   /* String for which to produce
145
         * compiled regular expression. */
146
{
147
    return (Tcl_RegExp) CompileRegexp(interp, string, (int) strlen(string),
148
      REG_ADVANCED);
149
}
150
 
151
/*
152
 *----------------------------------------------------------------------
153
 *
154
 * Tcl_RegExpExec --
155
 *
156
 *  Execute the regular expression matcher using a compiled form
157
 *  of a regular expression and save information about any match
158
 *  that is found.
159
 *
160
 * Results:
161
 *  If an error occurs during the matching operation then -1
162
 *  is returned and the interp's result contains an error message.
163
 *  Otherwise the return value is 1 if a matching range is
164
 *  found and 0 if there is no matching range.
165
 *
166
 * Side effects:
167
 *  None.
168
 *
169
 *----------------------------------------------------------------------
170
 */
171
 
172
int
173
Tcl_RegExpExec(interp, re, string, start)
174
    Tcl_Interp *interp;   /* Interpreter to use for error reporting. */
175
    Tcl_RegExp re;    /* Compiled regular expression;  must have
176
         * been returned by previous call to
177
         * Tcl_GetRegExpFromObj. */
178
    CONST char *string;   /* String against which to match re. */
179
    CONST char *start;    /* If string is part of a larger string,
180
         * this identifies beginning of larger
181
         * string, so that "^" won't match. */
182
{
183
    int flags, result, numChars;
184
    TclRegexp *regexp = (TclRegexp *)re;
185
    Tcl_DString ds;
186
    CONST Tcl_UniChar *ustr;
187
 
188
    /*
189
     * If the starting point is offset from the beginning of the buffer,
190
     * then we need to tell the regexp engine not to match "^".
191
     */
192
 
193
    if (string > start) {
194
  flags = REG_NOTBOL;
195
    } else {
196
  flags = 0;
197
    }
198
 
199
    /*
200
     * Remember the string for use by Tcl_RegExpRange().
201
     */
202
 
203
    regexp->string = string;
204
    regexp->objPtr = NULL;
205
 
206
    /*
207
     * Convert the string to Unicode and perform the match.
208
     */
209
 
210
    Tcl_DStringInit(&ds);
211
    ustr = Tcl_UtfToUniCharDString(string, -1, &ds);
212
    numChars = Tcl_DStringLength(&ds) / sizeof(Tcl_UniChar);
213
    result = RegExpExecUniChar(interp, re, ustr, numChars,
214
      -1 /* nmatches */, flags);
215
    Tcl_DStringFree(&ds);
216
 
217
    return result;
218
}
219
 
220
/*
221
 *---------------------------------------------------------------------------
222
 *
223
 * Tcl_RegExpRange --
224
 *
225
 *  Returns pointers describing the range of a regular expression match,
226
 *  or one of the subranges within the match.
227
 *
228
 * Results:
229
 *  The variables at *startPtr and *endPtr are modified to hold the
230
 *  addresses of the endpoints of the range given by index.  If the
231
 *  specified range doesn't exist then NULLs are returned.
232
 *
233
 * Side effects:
234
 *  None.
235
 *
236
 *---------------------------------------------------------------------------
237
 */
238
 
239
void
240
Tcl_RegExpRange(re, index, startPtr, endPtr)
241
    Tcl_RegExp re;    /* Compiled regular expression that has
242
         * been passed to Tcl_RegExpExec. */
243
    int index;      /* 0 means give the range of the entire
244
         * match, > 0 means give the range of
245
         * a matching subrange. */
246
    CONST char **startPtr;  /* Store address of first character in
247
         * (sub-) range here. */
248
    CONST char **endPtr;  /* Store address of character just after last
249
         * in (sub-) range here. */
250
{
251
    TclRegexp *regexpPtr = (TclRegexp *) re;
252
    CONST char *string;
253
 
254
    if ((size_t) index > regexpPtr->re.re_nsub) {
255
  *startPtr = *endPtr = NULL;
256
    } else if (regexpPtr->matches[index].rm_so < 0) {
257
  *startPtr = *endPtr = NULL;
258
    } else {
259
  if (regexpPtr->objPtr) {
260
      string = Tcl_GetString(regexpPtr->objPtr);
261
  } else {
262
      string = regexpPtr->string;
263
  }
264
  *startPtr = Tcl_UtfAtIndex(string, regexpPtr->matches[index].rm_so);
265
  *endPtr = Tcl_UtfAtIndex(string, regexpPtr->matches[index].rm_eo);
266
    }
267
}
268
 
269
/*
270
 *---------------------------------------------------------------------------
271
 *
272
 * RegExpExecUniChar --
273
 *
274
 *  Execute the regular expression matcher using a compiled form of a
275
 *  regular expression and save information about any match that is
276
 *  found.
277
 *
278
 * Results:
279
 *  If an error occurs during the matching operation then -1 is
280
 *  returned and an error message is left in interp's result.
281
 *  Otherwise the return value is 1 if a matching range was found or
282
 *  0 if there was no matching range.
283
 *
284
 * Side effects:
285
 *  None.
286
 *
287
 *----------------------------------------------------------------------
288
 */
289
 
290
static int
291
RegExpExecUniChar(interp, re, wString, numChars, nmatches, flags)
292
    Tcl_Interp *interp;   /* Interpreter to use for error reporting. */
293
    Tcl_RegExp re;    /* Compiled regular expression; returned by
294
         * a previous call to Tcl_GetRegExpFromObj */
295
    CONST Tcl_UniChar *wString; /* String against which to match re. */
296
    int numChars;   /* Length of Tcl_UniChar string (must
297
         * be >= 0). */
298
    int nmatches;   /* How many subexpression matches (counting
299
         * the whole match as subexpression 0) are
300
         * of interest.  -1 means "don't know". */
301
    int flags;      /* Regular expression flags. */
302
{
303
    int status;
304
    TclRegexp *regexpPtr = (TclRegexp *) re;
305
    size_t last = regexpPtr->re.re_nsub + 1;
306
    size_t nm = last;
307
 
308
    if (nmatches >= 0 && (size_t) nmatches < nm) {
309
  nm = (size_t) nmatches;
310
    }
311
 
312
    status = TclReExec(&regexpPtr->re, wString, (size_t) numChars,
313
      &regexpPtr->details, nm, regexpPtr->matches, flags);
314
 
315
    /*
316
     * Check for errors.
317
     */
318
 
319
    if (status != REG_OKAY) {
320
  if (status == REG_NOMATCH) {
321
      return 0;
322
  }
323
  if (interp != NULL) {
324
      TclRegError(interp, "error while matching regular expression: ",
325
        status);
326
  }
327
  return -1;
328
    }
329
    return 1;
330
}
331
 
332
/*
333
 *---------------------------------------------------------------------------
334
 *
335
 * TclRegExpRangeUniChar --
336
 *
337
 *  Returns pointers describing the range of a regular expression match,
338
 *  or one of the subranges within the match, or the hypothetical range
339
 *  represented by the rm_extend field of the rm_detail_t.
340
 *
341
 * Results:
342
 *  The variables at *startPtr and *endPtr are modified to hold the
343
 *  offsets of the endpoints of the range given by index.  If the
344
 *  specified range doesn't exist then -1s are supplied.
345
 *
346
 * Side effects:
347
 *  None.
348
 *
349
 *---------------------------------------------------------------------------
350
 */
351
 
352
void
353
TclRegExpRangeUniChar(re, index, startPtr, endPtr)
354
    Tcl_RegExp re;    /* Compiled regular expression that has
355
         * been passed to Tcl_RegExpExec. */
356
    int index;      /* 0 means give the range of the entire
357
         * match, > 0 means give the range of
358
         * a matching subrange, -1 means the
359
         * range of the rm_extend field. */
360
    int *startPtr;    /* Store address of first character in
361
         * (sub-) range here. */
362
    int *endPtr;    /* Store address of character just after last
363
         * in (sub-) range here. */
364
{
365
    TclRegexp *regexpPtr = (TclRegexp *) re;
366
 
367
    if ((regexpPtr->flags&REG_EXPECT) && index == -1) {
368
  *startPtr = regexpPtr->details.rm_extend.rm_so;
369
  *endPtr = regexpPtr->details.rm_extend.rm_eo;
370
    } else if ((size_t) index > regexpPtr->re.re_nsub) {
371
  *startPtr = -1;
372
  *endPtr = -1;
373
    } else {
374
  *startPtr = regexpPtr->matches[index].rm_so;
375
  *endPtr = regexpPtr->matches[index].rm_eo;
376
    }
377
}
378
 
379
/*
380
 *----------------------------------------------------------------------
381
 *
382
 * Tcl_RegExpMatch --
383
 *
384
 *  See if a string matches a regular expression.
385
 *
386
 * Results:
387
 *  If an error occurs during the matching operation then -1
388
 *  is returned and the interp's result contains an error message.
389
 *  Otherwise the return value is 1 if "string" matches "pattern"
390
 *  and 0 otherwise.
391
 *
392
 * Side effects:
393
 *  None.
394
 *
395
 *----------------------------------------------------------------------
396
 */
397
 
398
int
399
Tcl_RegExpMatch(interp, string, pattern)
400
    Tcl_Interp *interp;   /* Used for error reporting. May be NULL. */
401
    CONST char *string;   /* String. */
402
    CONST char *pattern;  /* Regular expression to match against
403
         * string. */
404
{
405
    Tcl_RegExp re;
406
 
407
    re = Tcl_RegExpCompile(interp, pattern);
408
    if (re == NULL) {
409
  return -1;
410
    }
411
    return Tcl_RegExpExec(interp, re, string, string);
412
}
413
 
414
/*
415
 *----------------------------------------------------------------------
416
 *
417
 * Tcl_RegExpExecObj --
418
 *
419
 *  Execute a precompiled regexp against the given object.
420
 *
421
 * Results:
422
 *  If an error occurs during the matching operation then -1
423
 *  is returned and the interp's result contains an error message.
424
 *  Otherwise the return value is 1 if "string" matches "pattern"
425
 *  and 0 otherwise.
426
 *
427
 * Side effects:
428
 *  Converts the object to a Unicode object.
429
 *
430
 *----------------------------------------------------------------------
431
 */
432
 
433
int
434
Tcl_RegExpExecObj(interp, re, objPtr, offset, nmatches, flags)
435
    Tcl_Interp *interp;   /* Interpreter to use for error reporting. */
436
    Tcl_RegExp re;    /* Compiled regular expression;  must have
437
         * been returned by previous call to
438
         * Tcl_GetRegExpFromObj. */
439
    Tcl_Obj *objPtr;    /* String against which to match re. */
440
    int offset;     /* Character index that marks where matching
441
         * should begin. */
442
    int nmatches;   /* How many subexpression matches (counting
443
         * the whole match as subexpression 0) are
444
         * of interest.  -1 means all of them. */
445
    int flags;      /* Regular expression execution flags. */
446
{
447
    TclRegexp *regexpPtr = (TclRegexp *) re;
448
    Tcl_UniChar *udata;
449
    int length;
450
 
451
    /*
452
     * Save the target object so we can extract strings from it later.
453
     */
454
 
455
    regexpPtr->string = NULL;
456
    regexpPtr->objPtr = objPtr;
457
 
458
    udata = Tcl_GetUnicodeFromObj(objPtr, &length);
459
 
460
    if (offset > length) {
461
  offset = length;
462
    }
463
    udata += offset;
464
    length -= offset;
465
 
466
    return RegExpExecUniChar(interp, re, udata, length, nmatches, flags);
467
}
468
 
469
/*
470
 *----------------------------------------------------------------------
471
 *
472
 * Tcl_RegExpMatchObj --
473
 *
474
 *  See if an object matches a regular expression.
475
 *
476
 * Results:
477
 *  If an error occurs during the matching operation then -1
478
 *  is returned and the interp's result contains an error message.
479
 *  Otherwise the return value is 1 if "string" matches "pattern"
480
 *  and 0 otherwise.
481
 *
482
 * Side effects:
483
 *  Changes the internal rep of the pattern and string objects.
484
 *
485
 *----------------------------------------------------------------------
486
 */
487
 
488
int
489
Tcl_RegExpMatchObj(interp, stringObj, patternObj)
490
    Tcl_Interp *interp;   /* Used for error reporting. May be NULL. */
491
    Tcl_Obj *stringObj;   /* Object containing the String to search. */
492
    Tcl_Obj *patternObj;  /* Regular expression to match against
493
         * string. */
494
{
495
    Tcl_RegExp re;
496
 
497
    re = Tcl_GetRegExpFromObj(interp, patternObj,
498
      TCL_REG_ADVANCED | TCL_REG_NOSUB);
499
    if (re == NULL) {
500
  return -1;
501
    }
502
    return Tcl_RegExpExecObj(interp, re, stringObj, 0 /* offset */,
503
 
504
}
505
 
506
/*
507
 *----------------------------------------------------------------------
508
 *
509
 * Tcl_RegExpGetInfo --
510
 *
511
 *  Retrieve information about the current match.
512
 *
513
 * Results:
514
 *  None.
515
 *
516
 * Side effects:
517
 *  None.
518
 *
519
 *----------------------------------------------------------------------
520
 */
521
 
522
void
523
Tcl_RegExpGetInfo(regexp, infoPtr)
524
    Tcl_RegExp regexp;    /* Pattern from which to get subexpressions. */
525
    Tcl_RegExpInfo *infoPtr;  /* Match information is stored here.  */
526
{
527
    TclRegexp *regexpPtr = (TclRegexp *) regexp;
528
 
529
    infoPtr->nsubs = regexpPtr->re.re_nsub;
530
    infoPtr->matches = (Tcl_RegExpIndices *) regexpPtr->matches;
531
    infoPtr->extendStart = regexpPtr->details.rm_extend.rm_so;
532
}
533
 
534
/*
535
 *----------------------------------------------------------------------
536
 *
537
 * Tcl_GetRegExpFromObj --
538
 *
539
 *  Compile a regular expression into a form suitable for fast
540
 *  matching.  This procedure caches the result in a Tcl_Obj.
541
 *
542
 * Results:
543
 *  The return value is a pointer to the compiled form of string,
544
 *  suitable for passing to Tcl_RegExpExec.  If an error occurred
545
 *  while compiling the pattern, then NULL is returned and an error
546
 *  message is left in the interp's result.
547
 *
548
 * Side effects:
549
 *  Updates the native rep of the Tcl_Obj.
550
 *
551
 *----------------------------------------------------------------------
552
 */
553
 
554
Tcl_RegExp
555
Tcl_GetRegExpFromObj(interp, objPtr, flags)
556
    Tcl_Interp *interp;   /* For use in error reporting, and to access
557
         * the interp regexp cache. */
558
    Tcl_Obj *objPtr;    /* Object whose string rep contains regular
559
         * expression pattern.  Internal rep will be
560
         * changed to compiled form of this regular
561
         * expression. */
562
    int flags;      /* Regular expression compilation flags. */
563
{
564
    int length;
565
    Tcl_ObjType *typePtr;
566
    TclRegexp *regexpPtr;
567
    char *pattern;
568
 
569
    typePtr = objPtr->typePtr;
570
    regexpPtr = (TclRegexp *) objPtr->internalRep.otherValuePtr;
571
 
572
    if ((typePtr != &tclRegexpType) || (regexpPtr->flags != flags)) {
573
  pattern = Tcl_GetStringFromObj(objPtr, &length);
574
 
575
  regexpPtr = CompileRegexp(interp, pattern, length, flags);
576
  if (regexpPtr == NULL) {
577
      return NULL;
578
  }
579
 
580
  /*
581
   * Add a reference to the regexp so it will persist even if it is
582
   * pushed out of the current thread's regexp cache.  This reference
583
   * will be removed when the object's internal rep is freed.
584
   */
585
 
586
  regexpPtr->refCount++;
587
 
588
  /*
589
   * Free the old representation and set our type.
590
   */
591
 
592
  if ((typePtr != NULL) && (typePtr->freeIntRepProc != NULL)) {
593
      (*typePtr->freeIntRepProc)(objPtr);
594
  }
595
  objPtr->internalRep.otherValuePtr = (VOID *) regexpPtr;
596
  objPtr->typePtr = &tclRegexpType;
597
    }
598
    return (Tcl_RegExp) regexpPtr;
599
}
600
 
601
/*
602
 *----------------------------------------------------------------------
603
 *
604
 * TclRegAbout --
605
 *
606
 *  Return information about a compiled regular expression.
607
 *
608
 * Results:
609
 *  The return value is -1 for failure, 0 for success, although at
610
 *  the moment there's nothing that could fail.  On success, a list
611
 *  is left in the interp's result:  first element is the subexpression
612
 *  count, second is a list of re_info bit names.
613
 *
614
 * Side effects:
615
 *  None.
616
 *
617
 *----------------------------------------------------------------------
618
 */
619
 
620
int
621
TclRegAbout(interp, re)
622
    Tcl_Interp *interp;   /* For use in variable assignment. */
623
    Tcl_RegExp re;    /* The compiled regular expression. */
624
{
625
    TclRegexp *regexpPtr = (TclRegexp *)re;
626
    char buf[TCL_INTEGER_SPACE];
627
    static struct infoname {
628
  int bit;
629
  char *text;
630
    } infonames[] = {
631
  {REG_UBACKREF,    "REG_UBACKREF"},
632
  {REG_ULOOKAHEAD,  "REG_ULOOKAHEAD"},
633
  {REG_UBOUNDS,   "REG_UBOUNDS"},
634
  {REG_UBRACES,   "REG_UBRACES"},
635
  {REG_UBSALNUM,    "REG_UBSALNUM"},
636
  {REG_UPBOTCH,   "REG_UPBOTCH"},
637
  {REG_UBBS,    "REG_UBBS"},
638
  {REG_UNONPOSIX,   "REG_UNONPOSIX"},
639
  {REG_UUNSPEC,   "REG_UUNSPEC"},
640
  {REG_UUNPORT,   "REG_UUNPORT"},
641
  {REG_ULOCALE,   "REG_ULOCALE"},
642
  {REG_UEMPTYMATCH, "REG_UEMPTYMATCH"},
643
  {REG_UIMPOSSIBLE, "REG_UIMPOSSIBLE"},
644
  {REG_USHORTEST,   "REG_USHORTEST"},
645
  {0,     ""}
646
    };
647
    struct infoname *inf;
648
    int n;
649
 
650
    Tcl_ResetResult(interp);
651
 
652
    sprintf(buf, "%u", (unsigned)(regexpPtr->re.re_nsub));
653
    Tcl_AppendElement(interp, buf);
654
 
655
    /*
656
     * Must count bits before generating list, because we must know
657
     * whether {} are needed before we start appending names.
658
     */
659
    n = 0;
660
    for (inf = infonames; inf->bit != 0; inf++) {
661
  if (regexpPtr->re.re_info&inf->bit) {
662
      n++;
663
  }
664
    }
665
    if (n != 1) {
666
  Tcl_AppendResult(interp, " {", NULL);
667
    }
668
    for (inf = infonames; inf->bit != 0; inf++) {
669
  if (regexpPtr->re.re_info&inf->bit) {
670
      Tcl_AppendElement(interp, inf->text);
671
  }
672
    }
673
    if (n != 1) {
674
  Tcl_AppendResult(interp, "}", NULL);
675
    }
676
 
677
    return 0;
678
}
679
 
680
/*
681
 *----------------------------------------------------------------------
682
 *
683
 * TclRegError --
684
 *
685
 *  Generate an error message based on the regexp status code.
686
 *
687
 * Results:
688
 *  Places an error in the interpreter.
689
 *
690
 * Side effects:
691
 *  Sets errorCode as well.
692
 *
693
 *----------------------------------------------------------------------
694
 */
695
 
696
void
697
TclRegError(interp, msg, status)
698
    Tcl_Interp *interp;   /* Interpreter for error reporting. */
699
    CONST char *msg;    /* Message to prepend to error. */
700
    int status;     /* Status code to report. */
701
{
702
    char buf[100];    /* ample in practice */
703
    char cbuf[100];   /* lots in practice */
704
    size_t n;
705
    char *p;
706
 
707
    Tcl_ResetResult(interp);
708
    n = TclReError(status, (regex_t *)NULL, buf, sizeof(buf));
709
    p = (n > sizeof(buf)) ? "..." : "";
710
    Tcl_AppendResult(interp, msg, buf, p, NULL);
711
 
712
    sprintf(cbuf, "%d", status);
713
    (VOID) TclReError(REG_ITOA, (regex_t *)NULL, cbuf, sizeof(cbuf));
714
    Tcl_SetErrorCode(interp, "REGEXP", cbuf, buf, NULL);
715
}
716
 
717
 
718
/*
719
 *----------------------------------------------------------------------
720
 *
721
 * FreeRegexpInternalRep --
722
 *
723
 *  Deallocate the storage associated with a regexp object's internal
724
 *  representation.
725
 *
726
 * Results:
727
 *  None.
728
 *
729
 * Side effects:
730
 *  Frees the compiled regular expression.
731
 *
732
 *----------------------------------------------------------------------
733
 */
734
 
735
static void
736
FreeRegexpInternalRep(objPtr)
737
    Tcl_Obj *objPtr;    /* Regexp object with internal rep to free. */
738
{
739
    TclRegexp *regexpRepPtr = (TclRegexp *) objPtr->internalRep.otherValuePtr;
740
 
741
    /*
742
     * If this is the last reference to the regexp, free it.
743
     */
744
 
745
    if (--(regexpRepPtr->refCount) <= 0) {
746
  FreeRegexp(regexpRepPtr);
747
    }
748
}
749
 
750
/*
751
 *----------------------------------------------------------------------
752
 *
753
 * DupRegexpInternalRep --
754
 *
755
 *  We copy the reference to the compiled regexp and bump its
756
 *  reference count.
757
 *
758
 * Results:
759
 *  None.
760
 *
761
 * Side effects:
762
 *  Increments the reference count of the regexp.
763
 *
764
 *----------------------------------------------------------------------
765
 */
766
 
767
static void
768
DupRegexpInternalRep(srcPtr, copyPtr)
769
    Tcl_Obj *srcPtr;    /* Object with internal rep to copy. */
770
    Tcl_Obj *copyPtr;   /* Object with internal rep to set. */
771
{
772
    TclRegexp *regexpPtr = (TclRegexp *) srcPtr->internalRep.otherValuePtr;
773
    regexpPtr->refCount++;
774
    copyPtr->internalRep.otherValuePtr = srcPtr->internalRep.otherValuePtr;
775
    copyPtr->typePtr = &tclRegexpType;
776
}
777
 
778
/*
779
 *----------------------------------------------------------------------
780
 *
781
 * SetRegexpFromAny --
782
 *
783
 *  Attempt to generate a compiled regular expression for the Tcl object
784
 *  "objPtr".
785
 *
786
 * Results:
787
 *  The return value is TCL_OK or TCL_ERROR. If an error occurs during
788
 *  conversion, an error message is left in the interpreter's result
789
 *  unless "interp" is NULL.
790
 *
791
 * Side effects:
792
 *  If no error occurs, a regular expression is stored as "objPtr"s
793
 *  internal representation.
794
 *
795
 *----------------------------------------------------------------------
796
 */
797
 
798
static int
799
SetRegexpFromAny(interp, objPtr)
800
    Tcl_Interp *interp;   /* Used for error reporting if not NULL. */
801
    Tcl_Obj *objPtr;    /* The object to convert. */
802
{
803
    if (Tcl_GetRegExpFromObj(interp, objPtr, REG_ADVANCED) == NULL) {
804
  return TCL_ERROR;
805
    }
806
    return TCL_OK;
807
}
808
 
809
/*
810
 *---------------------------------------------------------------------------
811
 *
812
 * CompileRegexp --
813
 *
814
 *  Attempt to compile the given regexp pattern.  If the compiled
815
 *  regular expression can be found in the per-thread cache, it
816
 *  will be used instead of compiling a new copy.
817
 *
818
 * Results:
819
 *  The return value is a pointer to a newly allocated TclRegexp
820
 *  that represents the compiled pattern, or NULL if the pattern
821
 *  could not be compiled.  If NULL is returned, an error message is
822
 *  left in the interp's result.
823
 *
824
 * Side effects:
825
 *  The thread-local regexp cache is updated and a new TclRegexp may
826
 *  be allocated.
827
 *
828
 *----------------------------------------------------------------------
829
 */
830
 
831
static TclRegexp *
832
CompileRegexp(interp, string, length, flags)
833
    Tcl_Interp *interp;   /* Used for error reporting if not NULL. */
834
    CONST char *string;   /* The regexp to compile (UTF-8). */
835
    int length;     /* The length of the string in bytes. */
836
    int flags;      /* Compilation flags. */
837
{
838
    TclRegexp *regexpPtr;
839
    CONST Tcl_UniChar *uniString;
840
    int numChars;
841
    Tcl_DString stringBuf;
842
    int status, i;
843
    ThreadSpecificData *tsdPtr = TCL_TSD_INIT(&dataKey);
844
 
845
    if (!tsdPtr->initialized) {
846
  tsdPtr->initialized = 1;
847
  Tcl_CreateThreadExitHandler(FinalizeRegexp, NULL);
848
    }
849
 
850
    /*
851
     * This routine maintains a second-level regular expression cache in
852
     * addition to the per-object regexp cache.  The per-thread cache is needed
853
     * to handle the case where for various reasons the object is lost between
854
     * invocations of the regexp command, but the literal pattern is the same.
855
     */
856
 
857
    /*
858
     * Check the per-thread compiled regexp cache.  We can only reuse
859
     * a regexp if it has the same pattern and the same flags.
860
     */
861
 
862
    for (i = 0; (i < NUM_REGEXPS) && (tsdPtr->patterns[i] != NULL); i++) {
863
  if ((length == tsdPtr->patLengths[i])
864
    && (tsdPtr->regexps[i]->flags == flags)
865
    && (strcmp(string, tsdPtr->patterns[i]) == 0)) {
866
      /*
867
       * Move the matched pattern to the first slot in the
868
       * cache and shift the other patterns down one position.
869
       */
870
 
871
      if (i != 0) {
872
    int j;
873
    char *cachedString;
874
 
875
    cachedString = tsdPtr->patterns[i];
876
    regexpPtr = tsdPtr->regexps[i];
877
    for (j = i-1; j >= 0; j--) {
878
        tsdPtr->patterns[j+1] = tsdPtr->patterns[j];
879
        tsdPtr->patLengths[j+1] = tsdPtr->patLengths[j];
880
        tsdPtr->regexps[j+1] = tsdPtr->regexps[j];
881
    }
882
    tsdPtr->patterns[0] = cachedString;
883
    tsdPtr->patLengths[0] = length;
884
    tsdPtr->regexps[0] = regexpPtr;
885
      }
886
      return tsdPtr->regexps[0];
887
  }
888
    }
889
 
890
    /*
891
     * This is a new expression, so compile it and add it to the cache.
892
     */
893
 
894
    regexpPtr = (TclRegexp *) ckalloc(sizeof(TclRegexp));
895
    regexpPtr->objPtr = NULL;
896
    regexpPtr->string = NULL;
897
    regexpPtr->details.rm_extend.rm_so = -1;
898
    regexpPtr->details.rm_extend.rm_eo = -1;
899
 
900
    /*
901
     * Get the up-to-date string representation and map to unicode.
902
     */
903
 
904
    Tcl_DStringInit(&stringBuf);
905
    uniString = Tcl_UtfToUniCharDString(string, length, &stringBuf);
906
    numChars = Tcl_DStringLength(&stringBuf) / sizeof(Tcl_UniChar);
907
 
908
    /*
909
     * Compile the string and check for errors.
910
     */
911
 
912
    regexpPtr->flags = flags;
913
    status = TclReComp(&regexpPtr->re, uniString, (size_t) numChars, flags);
914
    Tcl_DStringFree(&stringBuf);
915
 
916
    if (status != REG_OKAY) {
917
  /*
918
   * Clean up and report errors in the interpreter, if possible.
919
   */
920
 
921
  ckfree((char *)regexpPtr);
922
  if (interp) {
923
      TclRegError(interp,
924
        "couldn't compile regular expression pattern: ",
925
        status);
926
  }
927
  return NULL;
928
    }
929
 
930
    /*
931
     * Allocate enough space for all of the subexpressions, plus one
932
     * extra for the entire pattern.
933
     */
934
 
935
    regexpPtr->matches = (regmatch_t *) ckalloc(
936
      sizeof(regmatch_t) * (regexpPtr->re.re_nsub + 1));
937
 
938
    /*
939
     * Initialize the refcount to one initially, since it is in the cache.
940
     */
941
 
942
    regexpPtr->refCount = 1;
943
 
944
    /*
945
     * Free the last regexp, if necessary, and make room at the head of the
946
     * list for the new regexp.
947
     */
948
 
949
    if (tsdPtr->patterns[NUM_REGEXPS-1] != NULL) {
950
  TclRegexp *oldRegexpPtr = tsdPtr->regexps[NUM_REGEXPS-1];
951
  if (--(oldRegexpPtr->refCount) <= 0) {
952
      FreeRegexp(oldRegexpPtr);
953
  }
954
  ckfree(tsdPtr->patterns[NUM_REGEXPS-1]);
955
    }
956
    for (i = NUM_REGEXPS - 2; i >= 0; i--) {
957
  tsdPtr->patterns[i+1] = tsdPtr->patterns[i];
958
  tsdPtr->patLengths[i+1] = tsdPtr->patLengths[i];
959
  tsdPtr->regexps[i+1] = tsdPtr->regexps[i];
960
    }
961
    tsdPtr->patterns[0] = (char *) ckalloc((unsigned) (length+1));
962
    strcpy(tsdPtr->patterns[0], string);
963
    tsdPtr->patLengths[0] = length;
964
    tsdPtr->regexps[0] = regexpPtr;
965
 
966
    return regexpPtr;
967
}
968
 
969
/*
970
 *----------------------------------------------------------------------
971
 *
972
 * FreeRegexp --
973
 *
974
 *  Release the storage associated with a TclRegexp.
975
 *
976
 * Results:
977
 *  None.
978
 *
979
 * Side effects:
980
 *  None.
981
 *
982
 *----------------------------------------------------------------------
983
 */
984
 
985
static void
986
FreeRegexp(regexpPtr)
987
    TclRegexp *regexpPtr; /* Compiled regular expression to free. */
988
{
989
    TclReFree(&regexpPtr->re);
990
    if (regexpPtr->matches) {
991
  ckfree((char *) regexpPtr->matches);
992
    }
993
    ckfree((char *) regexpPtr);
994
}
995
 
996
/*
997
 *----------------------------------------------------------------------
998
 *
999
 * FinalizeRegexp --
1000
 *
1001
 *  Release the storage associated with the per-thread regexp
1002
 *  cache.
1003
 *
1004
 * Results:
1005
 *  None.
1006
 *
1007
 * Side effects:
1008
 *  None.
1009
 *
1010
 *----------------------------------------------------------------------
1011
 */
1012
 
1013
static void
1014
FinalizeRegexp(clientData)
1015
    ClientData clientData;  /* Not used. */
1016
{
1017
    int i;
1018
    TclRegexp *regexpPtr;
1019
    ThreadSpecificData *tsdPtr = TCL_TSD_INIT(&dataKey);
1020
 
1021
    for (i = 0; (i < NUM_REGEXPS) && (tsdPtr->patterns[i] != NULL); i++) {
1022
  regexpPtr = tsdPtr->regexps[i];
1023
  if (--(regexpPtr->refCount) <= 0) {
1024
      FreeRegexp(regexpPtr);
1025
  }
1026
  ckfree(tsdPtr->patterns[i]);
1027
  tsdPtr->patterns[i] = NULL;
1028
    }
1029
    /*
1030
     * We may find ourselves reinitialized if another finalization routine
1031
     * invokes regexps.
1032
     */
1033
    tsdPtr->initialized = 0;
1034
}