Changeset 396
- Timestamp:
- 08/12/06 05:43:40 (2 years ago)
- Files:
-
- 1.7.6/CHANGES.176 (modified) (1 diff)
- 1.7.6/Configure (modified) (1 diff)
- 1.7.6/Patchlevel (modified) (1 diff)
- 1.7.6/game/txt/hlp/pennfunc.hlp (modified) (1 diff)
- 1.7.6/game/txt/hlp/pennv176.hlp (modified) (3 diffs)
- 1.7.6/game/txt/hlp/pennvOLD.hlp (modified) (1 diff)
- 1.7.6/hdrs/oldattrb.h (deleted)
- 1.7.6/hdrs/pcre.h (modified) (1 diff)
- 1.7.6/hdrs/version.h (modified) (1 diff)
- 1.7.6/hdrs/warnings.h (deleted)
- 1.7.6/po/Makefile (modified) (1 diff)
- 1.7.6/src/convdb.c (deleted)
- 1.7.6/src/csrimalloc.c (modified) (4 diffs)
- 1.7.6/src/pcre.c (modified) (175 diffs)
- 1.7.6/src/switchinc.c (modified) (1 diff)
- 1.7.6/utils/mkcmds.sh (deleted)
- 1.7.6/win32/cmds.h (modified) (1 diff)
- 1.7.6/win32/config.h (modified) (1 diff)
- 1.7.6/win32/funs.h (modified) (2 diffs)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
1.7.6/CHANGES.176
r394 r396 18 18 19 19 ========================================================================== 20 21 Version 1.7.6 patchlevel 16 April 28, 2004 22 23 Fixes: 24 * PCRE updated to 4.5 [SW] 25 20 26 21 27 Version 1.7.6 patchlevel 15 January 25, 2004 1.7.6/Configure
r382 r396 19 19 # 20 20 21 # $Id: Head.U 1. 1 Thu, 20 Jul 2000 17:42:54 -0500 dunemush $21 # $Id: Head.U 1.2 Fri, 15 Feb 2002 08:37:53 -0600 dunemush $ 22 22 # 23 # Generated on Mon Sep 30 23:26:16 CDT 2002[metaconfig 3.0 PL70]23 # Generated on Tue Apr 8 22:02:10 CDT 2003 [metaconfig 3.0 PL70] 24 24 25 25 cat >/tmp/c1$$ <<EOF 1.7.6/Patchlevel
r394 r396 1 1 Do not edit this file. It is maintained by the official PennMUSH patches. 2 This is PennMUSH 1.7.6p1 52 This is PennMUSH 1.7.6p16 1.7.6/game/txt/hlp/pennfunc.hlp
r386 r396 2558 2558 register. Under regmatchi, case of the substring may be modified. 2559 2559 2560 For example, i f <string> is 'cookies=30', and <regexp> is '(.+)=([0-9]*)'2561 ( parsed; note that escaping may be necessary), then the 0th substring2560 For example, in regmatch( cookies=30 , (.+)=(\[0-9\]*) ) 2561 (note use of escaping for MUSH parser), then the 0th substring 2562 2562 matched is 'cookies=30', the 1st substring is 'cookies', and the 2nd 2563 2563 substring is '30'. If <register list> is '0 3 5', then %q0 will become 1.7.6/game/txt/hlp/pennv176.hlp
r394 r396 1 & 1.7.6p1 51 & 1.7.6p16 2 2 & changes 3 3 This is a list of changes in this patchlevel which are probably of … … 12 12 be read in 'help patchlevels'. 13 13 14 Version 1.7.6 patchlevel 16 April 28, 2004 15 16 Fixes: 17 * PCRE updated to 4.5 [SW] 18 19 20 & 1.7.6p15 14 21 Version 1.7.6 patchlevel 15 January 25, 2004 15 22 … … 49 56 50 57 Minor changes: 51 * Users no longer see last connection information when they 58 * Users no longer see last connection information when they 52 59 connect to Guests. Suggested by Jules@M*U*S*H. 53 60 Fixes: 54 * Potential problem with ambigious names in the information functions 61 * Potential problem with ambigious names in the information functions 55 62 fixed. [SW] 56 63 * The 'chat' config group is no longer displayed when CHAT_SYSTEM 1.7.6/game/txt/hlp/pennvOLD.hlp
r394 r396 4418 4418 type 'help <version>p<patchlevel>'. For example, 'help 1.7.2p3' 4419 4419 4420 1.7.6: 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 4420 1.7.6: 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 4421 4421 1.7.5: 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 4422 4422 1.7.4: 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 1.7.6/hdrs/pcre.h
r384 r396 45 45 #define PCRE_UTF8 0x0800 46 46 #define PCRE_NO_AUTO_CAPTURE 0x1000 47 #define PCRE_NO_UTF8_CHECK 0x2000 47 48 48 49 /* Exec-time and get/set-time error codes */ 1.7.6/hdrs/version.h
r394 r396 1 #define VERSION "PennMUSH version 1.7.6 patchlevel 1 5 [01/25/2004]"2 #define SHORTVN "PennMUSH 1.7.6p1 5"1 #define VERSION "PennMUSH version 1.7.6 patchlevel 16 [04/28/2004]" 2 #define SHORTVN "PennMUSH 1.7.6p16" 1.7.6/po/Makefile
r390 r396 2 2 3 3 POFILES=ru_RU.po nl_NL.po sv_SE.po hu_HU.po es_ES.po pt_BR.po fr_FR.po \ 4 da_DK.po de_DE.po no_NO.po pl_PL.po ro_RO.po 4 da_DK.po de_DE.po no_NO.po pl_PL.po ro_RO.po id_ID.po 5 5 6 6 .pox.po: $*.pox pennmush.pot 1.7.6/src/csrimalloc.c
r353 r396 202 202 #endif /* EXTERNS_H__ */ /* Do not add anything after this line */ 203 203 204 /* $Id $ */204 /* $Id: csrimalloc.c 1.23.1.3 Sat, 20 Jul 2002 12:00:21 -0500 dunemush $ */ 205 205 #ifndef __ASSERT_H__ 206 206 #define __ASSERT_H__ … … 234 234 #endif 235 235 236 /* $Id $ */236 /* $Id: csrimalloc.c 1.23.1.3 Sat, 20 Jul 2002 12:00:21 -0500 dunemush $ */ 237 237 #ifndef __ALIGN_H__ 238 238 #define __ALIGN_H__ … … 685 685 * provide an alternative short name in globrename.h 686 686 */ 687 /* $Id $ */687 /* $Id: csrimalloc.c 1.23.1.3 Sat, 20 Jul 2002 12:00:21 -0500 dunemush $ */ 688 688 #ifndef __GLOBALRENAME_H__ 689 689 #define __GLOBALRENAME_H__ … … 801 801 802 802 univptr_t (*_malloc_memfunc) proto((Size_t)) = _mal_sbrk; 803 /* $Id $ */803 /* $Id: csrimalloc.c 1.23.1.3 Sat, 20 Jul 2002 12:00:21 -0500 dunemush $ */ 804 804 #ifndef __GLOBALS_H__ 805 805 #define __GLOBALS_H__ 1.7.6/src/pcre.c
r384 r396 43 43 #include <stdlib.h> 44 44 #include <stddef.h> 45 #include <setjmp.h> 45 46 #include "pcre.h" 46 47 #include "confmagic.h" 47 48 48 /* Bits of PCRE's config.h */ 49 #define LINK_SIZE 2 49 50 /* Bits of PCRE's conf.h */ 51 #define NEWLINE '\n' 52 #define LINK_SIZE 2 50 53 #define MATCH_LIMIT 100000 51 #define N EWLINE '\n'54 #define NO_RECURSE 52 55 53 56 /* Bits of internal.h */ … … 55 58 modules, but which are not relevant to the outside. */ 56 59 60 #define PCRE_DEFINITION /* Win32 __declspec(export) trigger for .dll */ 61 #define EXPORT 57 62 58 63 /* PCRE keeps offsets in its compiled code as 2-byte quantities by default. … … 121 126 (PCRE_CASELESS|PCRE_EXTENDED|PCRE_ANCHORED|PCRE_MULTILINE| \ 122 127 PCRE_DOTALL|PCRE_DOLLAR_ENDONLY|PCRE_EXTRA|PCRE_UNGREEDY|PCRE_UTF8| \ 123 PCRE_NO_AUTO_CAPTURE )128 PCRE_NO_AUTO_CAPTURE|PCRE_NO_UTF8_CHECK) 124 129 125 130 #define PUBLIC_EXEC_OPTIONS \ 126 (PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY )131 (PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|PCRE_NO_UTF8_CHECK) 127 132 128 133 #define PUBLIC_STUDY_OPTIONS 0 /* None defined */ … … 170 175 #endif 171 176 172 #ifndef ESC_t 173 #define ESC_t '\t' 177 /* We can't officially use ESC_t because it is a POSIX reserved identifier 178 (presumably because of all the others like size_t). */ 179 180 #ifndef ESC_tee 181 #define ESC_tee '\t' 174 182 #endif 175 183 … … 276 284 character > 255 is encountered. */ 277 285 278 OP_XCLASS, /* 5 6Extended class for handling UTF-8 chars within the286 OP_XCLASS, /* 57 Extended class for handling UTF-8 chars within the 279 287 class. This does both positive and negative. */ 280 288 281 OP_REF, /* 5 7Match a back reference */282 OP_RECURSE, /* 5 8Match a numbered subpattern (possibly recursive) */283 OP_CALLOUT, /* 59Call out to external function if provided */284 285 OP_ALT, /* 6 0Start of alternation */286 OP_KET, /* 6 1End of group that doesn't have an unbounded repeat */287 OP_KETRMAX, /* 6 2These two must remain together and in this */288 OP_KETRMIN, /* 6 3order. They are for groups the repeat for ever. */289 OP_REF, /* 58 Match a back reference */ 290 OP_RECURSE, /* 59 Match a numbered subpattern (possibly recursive) */ 291 OP_CALLOUT, /* 60 Call out to external function if provided */ 292 293 OP_ALT, /* 61 Start of alternation */ 294 OP_KET, /* 62 End of group that doesn't have an unbounded repeat */ 295 OP_KETRMAX, /* 63 These two must remain together and in this */ 296 OP_KETRMIN, /* 64 order. They are for groups the repeat for ever. */ 289 297 290 298 /* The assertions must come before ONCE and COND */ 291 299 292 OP_ASSERT, /* 6 4Positive lookahead */293 OP_ASSERT_NOT, /* 6 5Negative lookahead */294 OP_ASSERTBACK, /* 6 6Positive lookbehind */295 OP_ASSERTBACK_NOT, /* 6 7Negative lookbehind */296 OP_REVERSE, /* 6 8Move pointer back - used in lookbehind assertions */300 OP_ASSERT, /* 65 Positive lookahead */ 301 OP_ASSERT_NOT, /* 66 Negative lookahead */ 302 OP_ASSERTBACK, /* 67 Positive lookbehind */ 303 OP_ASSERTBACK_NOT, /* 68 Negative lookbehind */ 304 OP_REVERSE, /* 69 Move pointer back - used in lookbehind assertions */ 297 305 298 306 /* ONCE and COND must come after the assertions, with ONCE first, as there's 299 307 a test for >= ONCE for a subpattern that isn't an assertion. */ 300 308 301 OP_ONCE, /* 69Once matched, don't back up into the subpattern */302 OP_COND, /* 7 0Conditional group */303 OP_CREF, /* 7 1Used to hold an extraction string number (cond ref) */304 305 OP_BRAZERO, /* 7 2These two must remain together and in this */306 OP_BRAMINZERO, /* 7 3order. */307 308 OP_BRANUMBER, /* 7 4Used for extracting brackets whose number is greater309 OP_ONCE, /* 70 Once matched, don't back up into the subpattern */ 310 OP_COND, /* 71 Conditional group */ 311 OP_CREF, /* 72 Used to hold an extraction string number (cond ref) */ 312 313 OP_BRAZERO, /* 73 These two must remain together and in this */ 314 OP_BRAMINZERO, /* 74 order. */ 315 316 OP_BRANUMBER, /* 75 Used for extracting brackets whose number is greater 309 317 than can fit into an opcode. */ 310 318 311 OP_BRA /* 7 5This and greater values are used for brackets that319 OP_BRA /* 76 This and greater values are used for brackets that 312 320 extract substrings up to a basic limit. After that, 313 321 use is made of OP_BRANUMBER. */ … … 352 360 2, /* Chars - the minimum length */ \ 353 361 2, /* not */ \ 354 /* Positive single-char repeats */ \355 2, 2, 2, 2, 2, 2, /* *, *?, +, +?, ?, ?? ** These are*/ \356 4, 4, 4, /* upto, minupto, exact ** minima*/ \357 /* Negative single-char repeats */ \362 /* Positive single-char repeats ** These are */ \ 363 2, 2, 2, 2, 2, 2, /* *, *?, +, +?, ?, ?? ** minima in */ \ 364 4, 4, 4, /* upto, minupto, exact ** UTF-8 mode */ \ 365 /* Negative single-char repeats - only for chars < 256 */ \ 358 366 2, 2, 2, 2, 2, 2, /* NOT *, *?, +, +?, ?, ?? */ \ 359 367 4, 4, 4, /* NOT upto, minupto, exact */ \ … … 447 455 #define ERR42 "syntax error after (?P" 448 456 #define ERR43 "two named groups have the same name" 457 #define ERR44 "invalid UTF-8 string" 449 458 450 459 /* All character handling must be done as unsigned characters. Otherwise there … … 510 519 511 520 typedef struct recursion_info { 512 struct recursion_info *prev ; /* Previous recursion record (or NULL) */521 struct recursion_info *prevrec; /* Previous recursion record (or NULL) */ 513 522 int group_num; /* Number of group that was called */ 514 523 const uschar *after_call; /* "Return value": points after the call in the expr */ … … 517 526 int saved_max; /* Number of saved offsets */ 518 527 } recursion_info; 528 529 /* When compiling in a mode that doesn't use recursive calls to match(), 530 a structure is used to remember local variables on the heap. It is defined in 531 pcre.c, close to the match() function, so that it is easy to keep it in step 532 with any changes of local variable. However, the pointer to the current frame 533 must be saved in some "static" place over a longjmp(). We declare the 534 structure here so that we can put a pointer in the match_data structure. 535 NOTE: This isn't used for a "normal" compilation of pcre. */ 536 537 struct heapframe; 519 538 520 539 /* Structure for passing "static" information around between the functions … … 545 564 recursion_info *recursive; /* Linked list of recursion data */ 546 565 void *callout_data; /* To pass back to callouts */ 566 struct heapframe *thisframe; /* Used only when compiling for no recursion */ 547 567 } match_data; 548 568 … … 767 787 /* End of chartables.c */ 768 788 /* get.c */ 789 /* This module contains some convenience functions for extracting substrings 790 from the subject string after a regex match has succeeded. The original idea 791 for these functions came from Scott Wimer <scottw@cgibuilder.com>. */ 792 793 769 794 /************************************************* 770 795 * Copy captured string to given buffer * … … 810 835 } 811 836 837 838 812 839 /* End of get.c */ 813 840 /* maketables.c */ … … 819 846 a pointer to them. They are build using the ctype functions, and consequently 820 847 their contents will depend upon the current locale setting. When compiled as 821 part of the library, the store is obtained via malloc(), but when compiled848 part of the library, the store is obtained via pcre_malloc(), but when compiled 822 849 inside dftables, use malloc(). 823 850 … … 832 859 int i; 833 860 861 #ifndef DFTABLES 834 862 yield = (unsigned char *) malloc(tables_length); 863 #else 864 yield = (unsigned char *) malloc(tables_length); 865 #endif 835 866 836 867 if (yield == NULL) … … 900 931 if (isalnum(i) || i == '_') 901 932 x += ctype_word; 933 934 /* Note: strchr includes the terminating zero in the characters it considers. 935 In this instance, that is ok because we want binary zero to be flagged as a 936 meta-character, which in this sense is any character that terminates a run 937 of data characters. */ 938 902 939 if (strchr("*+?{^.$|()[", i) != 0) 903 940 x += ctype_meta; … … 910 947 /* End of maketables.c */ 911 948 /* study.c */ 912 913 949 /************************************************* 914 950 * Set a bit and maybe its alternate case * … … 1125 1161 case OP_TYPEMINQUERY: 1126 1162 switch (tcode[1]) { 1163 case OP_ANY: 1164 return FALSE; 1165 1127 1166 case OP_NOT_DIGIT: 1128 1167 for (c = 0; c < 32; c++) … … 1162 1201 bits and either carry on or not, according to the repeat count. If it was 1163 1202 a negative class, and we are operating with UTF-8 characters, any byte 1164 with the top-bit set is a potentially valid starter because it may start 1165 a character with a value > 255. (This is sub-optimal in that the 1166 character may be in the range 128-255, and those characters might be 1167 unwanted, but that's as far as we go for the moment.) */ 1203 with a value >= 0xc4 is a potentially valid starter because it starts a 1204 character with a value > 255. */ 1168 1205 1169 1206 case OP_NCLASS: 1170 if (utf8) 1171 memset(start_bits + 16, 0xff, 16); 1207 if (utf8) { 1208 start_bits[24] |= 0xf0; /* Bits for 0xc4 - 0xc8 */ 1209 memset(start_bits + 25, 0xff, 7); /* Bits for 0xc9 - 0xff */ 1210 } 1172 1211 /* Fall through */ 1173 1212 … … 1175 1214 { 1176 1215 tcode++; 1177 for (c = 0; c < 32; c++) 1178 start_bits[c] |= tcode[c]; 1216 1217 /* In UTF-8 mode, the bits in a bit map correspond to character 1218 values, not to byte values. However, the bit map we are constructing is 1219 for byte values. So we have to do a conversion for characters whose 1220 value is > 127. In fact, there are only two possible starting bytes for 1221 characters in the range 128 - 255. */ 1222 1223 if (utf8) { 1224 for (c = 0; c < 16; c++) 1225 start_bits[c] |= tcode[c]; 1226 for (c = 128; c < 256; c++) { 1227 if ((tcode[c / 8] && (1 << (c & 7))) != 0) { 1228 int d = (c >> 6) | 0xc0; /* Set bit for this starter */ 1229 start_bits[d / 8] |= (1 << (d & 7)); /* and then skip on to the */ 1230 c = (c & 0xc0) + 0x40 - 1; /* next relevant character. */ 1231 } 1232 } 1233 } 1234 1235 /* In non-UTF-8 mode, the two bit maps are completely compatible. */ 1236 1237 else { 1238 for (c = 0; c < 32; c++) 1239 start_bits[c] |= tcode[c]; 1240 } 1241 1242 /* Advance past the bit map, and act on what follows */ 1243 1179 1244 tcode += 32; 1180 1245 switch (*tcode) { … … 1231 1296 */ 1232 1297 1233 pcre_extra *1298 EXPORT pcre_extra * 1234 1299 pcre_study(const pcre * external_re, int options, const char **errorptr) 1235 1300 { … … 1282 1347 don't have to change that code. */ 1283 1348 1284 extra = (pcre_extra *) (malloc) 1285 (sizeof(pcre_extra) + sizeof(pcre_study_data)); 1349 extra = (pcre_extra *) malloc(sizeof(pcre_extra) + sizeof(pcre_study_data)); 1286 1350 1287 1351 if (extra == NULL) { … … 1338 1402 the definition is next to the definition of the opcodes in internal.h. */ 1339 1403 1340 static uschar OP_lengths[] = { OP_LENGTHS };1404 static const uschar OP_lengths[] = { OP_LENGTHS }; 1341 1405 1342 1406 /* Min and max values for the common repeats; for the maxima, 0 => infinity */ … … 1359 1423 '`', 7, -ESC_b, 0, -ESC_d, ESC_e, ESC_f, 0, /* ` - g */ 1360 1424 0, 0, 0, 0, 0, 0, ESC_n, 0, /* h - o */ 1361 0, 0, ESC_r, -ESC_s, ESC_t , 0, 0, -ESC_w, /* p - w */1425 0, 0, ESC_r, -ESC_s, ESC_tee, 0, 0, -ESC_w, /* p - w */ 1362 1426 0, 0, -ESC_z /* x - z */ 1363 1427 }; 1428 1429 1364 1430 1365 1431 /* Tables of names of POSIX character classes and their lengths. The list is … … 1367 1433 as this is assumed for handling case independence. */ 1368 1434 1369 static const char * posix_names[] = {1435 static const char *const posix_names[] = { 1370 1436 "alpha", "lower", "upper", 1371 1437 "alnum", "ascii", "blank", "cntrl", "digit", "graph", … … 1398 1464 }; 1399 1465 1466 /* Table to identify digits and hex digits. This is used when compiling 1467 patterns. Note that the tables in chartables are dependent on the locale, and 1468 may mark arbitrary characters as digits - but the PCRE compiling code expects 1469 to handle only 0-9, a-z, and A-Z as digits when compiling. That is why we have 1470 a private table here. It costs 256 bytes, but it is a lot faster than doing 1471 character value tests (at least in some simple cases I timed), and in some 1472 applications one wants PCRE to compile efficiently as well as match 1473 efficiently. 1474 1475 For convenience, we use the same bit definitions as in chartables: 1476 1477 0x04 decimal digit 1478 0x08 hexadecimal digit 1479 1480 Then we can use ctype_digit and ctype_xdigit in the code. */ 1481 1482 static const unsigned char digitab[] = { 1483 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0- 7 */ 1484 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 8- 15 */ 1485 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 16- 23 */ 1486 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 24- 31 */ 1487 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* - ' */ 1488 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* ( - / */ 1489 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, /* 0 - 7 */ 1490 0x0c, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 8 - ? */ 1491 0x00, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x00, /* @ - G */ 1492 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* H - O */ 1493 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* P - W */ 1494 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* X - _ */ 1495 0x00, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x00, /* ` - g */ 1496 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* h - o */ 1497 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* p - w */ 1498 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* x -127 */ 1499 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 128-135 */ 1500 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 136-143 */ 1501 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 144-151 */ 1502 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 152-159 */ 1503 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 160-167 */ 1504 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 168-175 */ 1505 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 176-183 */ 1506 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 184-191 */ 1507 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 192-199 */ 1508 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 200-207 */ 1509 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 208-215 */ 1510 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 216-223 */ 1511 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 224-231 */ 1512 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 232-239 */ 1513 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 240-247 */ 1514 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 1515 }; /* 248-255 */ 1516 1517 1400 1518 1401 1519 /* Definition to allow mutual recursion */ … … 1408 1526 stack, for holding the values of the subject pointer at the start of each 1409 1527 subpattern, so as to detect when an empty string has been matched by a 1410 subpattern - to break infinite loops. */ 1528 subpattern - to break infinite loops. When NO_RECURSE is set, these blocks 1529 are on the heap, not on the stack. */ 1411 1530 1412 1531 typedef struct eptrblock { 1413 struct eptrblock * prev;1414 const uschar * saved_eptr;1532 struct eptrblock *epb_prev; 1533 const uschar *epb_saved_eptr; 1415 1534 } eptrblock; 1416 1535 … … 1433 1552 1434 1553 /* PCRE is thread-clean and doesn't use any global variables in the normal 1435 sense. However, it calls memory allocation and free functions via the two1554 sense. However, it calls memory allocation and free functions via the four 1436 1555 indirections below, and it can optionally do callouts. These values can be 1437 1556 changed by the caller, but are shared between all threads. However, when 1438 1557 compiling for Virtual Pascal, things are done differently (see pcre.in). */ 1439 1558 1559 #ifndef VPCOMPAT 1560 #ifdef __cplusplus 1561 extern "C" void *(*pcre_malloc) (size_t) = malloc; 1562 extern "C" void (*pcre_free) (void *) = free; 1563 extern "C" void *(*pcre_stack_malloc) (size_t) = malloc; 1564 extern "C" void (*pcre_stack_free) (void *) = free; 1565 extern "C" int (*pcre_callout) (pcre_callout_block *) = NULL; 1566 #else 1567 void *(*pcre_malloc) (size_t) = malloc; 1568 void (*pcre_free) (void *) = free; 1569 void *(*pcre_stack_malloc) (size_t) = malloc; 1570 void (*pcre_stack_free) (void *) = free; 1440 1571 int (*pcre_callout) (pcre_callout_block *) = NULL; 1572 #endif 1573 #endif 1441 1574 1442 1575 … … 1444 1577 * Macros and tables for character handling * 1445 1578 *************************************************/ 1579 1580 /* When UTF-8 encoding is being used, a character is no longer just a single 1581 byte. The macros for character handling generate simple sequences when used in 1582 byte-mode, and more complicated ones for UTF-8 characters. */ 1446 1583 1447 1584 #define GETCHAR(c, eptr) c = *eptr; … … 1450 1587 #define GETCHARLEN(c, eptr, len) c = *eptr; 1451 1588 #define BACKCHAR(eptr) 1589 1452 1590 1453 1591 /************************************************* … … 1467 1605 options the options bits 1468 1606 isclass TRUE if inside a character class 1469 cd pointer to char tables block1470 1607 1471 1608 Returns: zero or positive => a data character … … 1476 1613 static int 1477 1614 check_escape(const uschar ** ptrptr, const char **errorptr, int bracount, 1478 int options, BOOL isclass , compile_data * cd)1615 int options, BOOL isclass) 1479 1616 { 1480 1617 const uschar *ptr = *ptrptr; … … 1487 1624 *errorptr = ERR1; 1488 1625 1489 /* Digits or letters may have special meaning; all others are literals. */ 1626 /* Non-alphamerics are literals. For digits or letters, do an initial lookup in 1627 a table. A non-zero result is something that can be returned immediately. 1628 Otherwise further processing may be required. */ 1490 1629 1491 1630 else if (c < '0' || c > 'z') { 1492 } 1493 1494 /* Do an initial lookup in a table. A non-zero result is something that can be 1495 returned immediately. Otherwise further processing may be required. */ 1496 1631 } /* Not alphameric */ 1497 1632 else if ((i = escapes[c - '0']) != 0) 1498 1633 c = i; 1634 1499 1635 1500 1636 /* Escapes that need further processing, or are illegal. */ … … 1542 1678 oldptr = ptr; 1543 1679 c -= '0'; 1544 while (( cd->ctypes[ptr[1]] & ctype_digit) != 0)1680 while ((digitab[ptr[1]] & ctype_digit) != 0) 1545 1681 c = c * 10 + *(++ptr) - '0'; 1546 1682 if (c < 10 || c <= bracount) { … … 1566 1702 case '0': 1567 1703 c -= '0'; 1568 while (i++ < 2 && (cd->ctypes[ptr[1]] & ctype_digit) != 0 && 1569 ptr[1] != '8' && ptr[1] != '9') 1704 while (i++ < 2 && ptr[1] >= '0' && ptr[1] <= '7') 1570 1705 c = c * 8 + *(++ptr) - '0'; 1571 1706 c &= 255; /* Take least significant 8 bits */ … … 1580 1715 1581 1716 c = 0; 1582 while (i++ < 2 && (cd->ctypes[ptr[1]] & ctype_xdigit) != 0) { 1583 ptr++; 1584 c = c * 16 + cd->lcc[*ptr] - 1585 (((cd->ctypes[*ptr] & ctype_digit) != 0) ? '0' : 'W'); 1717 while (i++ < 2 && (digitab[ptr[1]] & ctype_xdigit) != 0) { 1718 int cc; /* Some compilers don't like ++ */ 1719 cc = *(++ptr); /* in initializers */ 1720 if (cc >= 'a') 1721 cc -= 32; /* Convert to upper case */ 1722 c = c * 16 + cc - ((cc < 'A') ? '0' : ('A' - 10)); 1586 1723 } 1587 1724 break; … … 1596 1733 } 1597 1734 1598 /* A letter is upper-cased; then the 0x40 bit is flipped */ 1735 /* A letter is upper-cased; then the 0x40 bit is flipped. This coding 1736 is ASCII-specific, but then the whole concept of \cx is ASCII-specific. 1737 (However, an EBCDIC equivalent has now been added.) */ 1599 1738 1600 1739 if (c >= 'a' && c <= 'z') 1601 c = cd->fcc[c];1740 c -= 32; 1602 1741 c ^= 0x40; 1603 1742 break; … … 1637 1776 Arguments: 1638 1777 p pointer to the first char after '{' 1639 cd pointer to char tables block1640 1778 1641 1779 Returns: TRUE or FALSE … … 1643 1781 1644 1782 static BOOL 1645 is_counted_repeat(const uschar * p , compile_data * cd)1783 is_counted_repeat(const uschar * p) 1646 1784 { 1647 if (( cd->ctypes[*p++] & ctype_digit) == 0)1785 if ((digitab[*p++] & ctype_digit) == 0) 1648 1786 return FALSE; 1649 while (( cd->ctypes[*p] & ctype_digit) != 0)1787 while ((digitab[*p] & ctype_digit) != 0) 1650 1788 p++; 1651 1789 if (*p == '}') … … 1657 1795 return TRUE; 1658 1796 1659 if (( cd->ctypes[*p++] & ctype_digit) == 0)1797 if ((digitab[*p++] & ctype_digit) == 0) 1660 1798 return FALSE; 1661 while (( cd->ctypes[*p] & ctype_digit) != 0)1799 while ((digitab[*p] & ctype_digit) != 0) 1662 1800 p++; 1801 1663 1802 return (*p == '}'); 1664 1803 } … … 1680 1819 returned as -1 if no max 1681 1820 errorptr points to pointer to error message 1682 cd pointer to character tables clock1683 1821 1684 1822 Returns: pointer to '}' on success; … … 1688 1826 static const uschar * 1689 1827 read_repeat_counts(const uschar * p, int *minp, int *maxp, 1690 const char **errorptr , compile_data * cd)1828 const char **errorptr) 1691 1829 { 1692 1830 int min = 0; 1693 1831 int max = -1; 1694 1832 1695 while (( cd->ctypes[*p] & ctype_digit) != 0)1833 while ((digitab[*p] & ctype_digit) != 0) 1696 1834 min = min * 10 + *p++ - '0'; 1697 1835 … … 1701 1839 if (*(++p) != '}') { 1702 1840 max = 0; 1703 while (( cd->ctypes[*p] & ctype_digit) != 0)1841 while ((digitab[*p] & ctype_digit) != 0) 1704 1842 max = max * 10 + *p++ - '0'; 1705 1843 if (max < min) { … … 1969 2107 1970 2108 static const uschar * 1971 find_bracket(const uschar * code, int number)2109 find_bracket(const uschar * code, BOOL utf8, int number) 1972 2110 { 2111 utf8 = utf8; /* Stop pedantic compilers complaining */ 1973 2112 1974 2113 for (;;) { … … 1988 2127 code += OP_lengths[c]; 1989 2128 1990 /* In UTF-8 mode, opcodes that are followed by a character may be followed 1991 by a multi-byte character. The length in the table is a minimum, so we have 1992 to scan along to skip the extra characters. All opcodes are less than 128, 1993 so we can use relatively efficient code. */ 2129 } 2130 } 2131 } 2132 2133 2134 2135 /************************************************* 2136 * Scan compiled regex for recursion reference * 2137 *************************************************/ 2138 2139 /* This little function scans through a compiled pattern until it finds an 2140 instance of OP_RECURSE. 2141 2142 Arguments: 2143 code points to start of expression 2144 utf8 TRUE in UTF-8 mode 2145 2146 Returns: pointer to the opcode for OP_RECURSE, or NULL if not found 2147 */ 2148 2149 static const uschar * 2150 find_recurse(const uschar * code, BOOL utf8) 2151 { 2152 utf8 = utf8; /* Stop pedantic compilers complaining */ 2153 2154 for (;;) { 2155 register int c = *code; 2156 if (c == OP_END) 2157 return NULL; 2158 else if (c == OP_RECURSE) 2159 return code; 2160 else if (c == OP_CHARS) 2161 code += code[1] + OP_lengths[c]; 2162 else if (c > OP_BRA) { 2163 code += OP_lengths[OP_BRA]; 2164 } else { 2165 code += OP_lengths[c]; 1994 2166 1995 2167 } … … 2052 2224 /* Check for quantifiers after a class */ 2053 2225 2054 2055 2226 case OP_CLASS: 2056 2227 case OP_NCLASS: 2057 2228 ccode = code + 33; 2058 2059 2229 2060 2230 switch (*ccode) { … … 2109 2279 return TRUE; 2110 2280 2281 /* In UTF-8 mode, STAR, MINSTAR, QUERY, MINQUERY, UPTO, and MINUPTO may be 2282 followed by a multibyte character */ 2283 2111 2284 } 2112 2285 } … … 2213 2386 } 2214 2387 2388 2389 /************************************************* 2390 * Adjust OP_RECURSE items in repeated group * 2391 *************************************************/ 2392 2393 /* OP_RECURSE items contain an offset from the start of the regex to the group 2394 that is referenced. This means that groups can be replicated for fixed 2395 repetition simply by copying (because the recursion is allowed to refer to 2396 earlier groups that are outside the current group). However, when a group is 2397 optional (i.e. the minimum quantifier is zero), OP_BRAZERO is inserted before 2398 it, after it has been compiled. This means that any OP_RECURSE items within it 2399 that refer to the group itself or any contained groups have to have their 2400 offsets adjusted. That is the job of this function. Before it is called, the 2401 partially compiled regex must be temporarily terminated with OP_END. 2402 2403 Arguments: 2404 group points to the start of the group 2405 adjust the amount by which the group is to be moved 2406 utf8 TRUE in UTF-8 mode 2407 cd contains pointers to tables etc. 2408 2409 Returns: nothing 2410 */ 2411 2412 static void 2413 adjust_recurse(uschar * group, int adjust, BOOL utf8, compile_data * cd) 2414 { 2415 uschar *ptr = group; 2416 while ((ptr = (uschar *) find_recurse(ptr, utf8)) != NULL) { 2417 int offset = GET(ptr, 1); 2418 if (cd->start_code + offset >= group) 2419 PUT(ptr, 1, offset + adjust); 2420 ptr += 1 + LINK_SIZE; 2421 } 2422 } 2215 2423 2216 2424 … … 2471 2679 posix_class *= 3; 2472 2680 for (i = 0; i < 3; i++) { 2473 BOOL isblank= strncmp((char *) ptr, "blank", 5) == 0;2681 BOOL blankclass = strncmp((char *) ptr, "blank", 5) == 0; 2474 2682 int taboffset = posix_class_maps[posix_class + i]; 2475 2683 if (taboffset < 0) … … 2478 2686 for (c = 0; c < 32; c++) 2479 2687 class[c] |= ~cbits[c + taboffset]; 2480 if ( isblank)2688 if (blankclass) 2481 2689 class[1] |= 0x3c; 2482 2690 } else { 2483 2691 for (c = 0; c < 32; c++) 2484 2692 class[c] |= cbits[c + taboffset]; 2485 if ( isblank)2693 if (blankclass) 2486 2694 class[1] &= ~0x3c; 2487 2695 } … … 2502 2710 2503 2711 if (c == '\\') { 2504 c = check_escape(&ptr, errorptr, *brackets, options, TRUE , cd);2712 c = check_escape(&ptr, errorptr, *brackets, options, TRUE); 2505 2713 if (-c == ESC_b) 2506 2714 c = '\b'; /* \b is backslash in a class */ … … 2585 2793 if (d == '\\') { 2586 2794 const uschar *oldptr = ptr; 2587 d = check_escape(&ptr, errorptr, *brackets, options, TRUE , cd);2795 d = check_escape(&ptr, errorptr, *brackets, options, TRUE); 2588 2796 2589 2797 /* \b is backslash; any other special means the '-' was literal */ … … 2632 2840 2633 2841 LONE_SINGLE_CHARACTER: 2842 2843 /* Handle a multibyte character */ 2634 2844 2635 2845 /* Handle a single-byte character */ … … 2717 2927 2718 2928 case '{': 2719 if (!is_counted_repeat(ptr + 1 , cd))2929 if (!is_counted_repeat(ptr + 1)) 2720 2930 goto NORMAL_CHAR; 2721 ptr = read_repeat_counts(ptr + 1, &repeat_min, &repeat_max, errorptr , cd);2931 ptr = read_repeat_counts(ptr + 1, &repeat_min, &repeat_max, errorptr); 2722 2932 if (*errorptr != NULL) 2723 2933 goto FAILED; … … 2998 3208 2999 3209 /* If the maximum is 1 or unlimited, we just have to stick in the 3000 BRAZERO and do no more at this point. */ 3210 BRAZERO and do no more at this point. However, we do need to adjust 3211 any OP_RECURSE calls inside the group that refer to the group itself or 3212 any internal group, because the offset is from the start of the whole 3213 regex. Temporarily terminate the pattern while doing this. */ 3001 3214 3002 3215 if (repeat_max <= 1) { 3216 *code = OP_END; 3217 adjust_recurse(previous, 1, utf8, cd); 3003 3218 memmove(previous + 1, previous, len); 3004 3219 code++; … … 3010 3225 The first one has to be handled carefully because it's the original 3011 3226 copy, which has to be moved up. The remainder can be handled by code 3012 that is common with the non-zero minimum case below. We just have to 3013 adjust the value or repeat_max, since one less copy is required. */ 3227 that is common with the non-zero minimum case below. We have to 3228 adjust the value or repeat_max, since one less copy is required. Once 3229 again, we may have to adjust any OP_RECURSE calls inside the group. */ 3014 3230 3015 3231 else { 3016 3232 int offset; 3233 *code = OP_END; 3234 adjust_recurse(previous, 2 + LINK_SIZE, utf8, cd); 3017 3235 memmove(previous + 2 + LINK_SIZE, previous, len); 3018 3236 code += 2 + LINK_SIZE; … … 3171 3389 } 3172 3390 3173 /* Condition to test for a numbered subpattern match */ 3174 3175 else if ((cd->ctypes[ptr[1]] & ctype_digit) != 0) { 3391 /* Condition to test for a numbered subpattern match. We know that 3392 if a digit follows ( then there will just be digits until ) because 3393 the syntax was checked in the first pass. */ 3394 3395 else if ((digitab[ptr[1]] && ctype_digit) != 0) { 3176 3396 int condref; /* Don't amalgamate; some compilers */ 3177 3397 condref = *(++ptr) - '0'; /* grumble at autoincrement in declaration */ … … 3224 3444 { 3225 3445 int n = 0; 3226 while (( cd->ctypes[*(++ptr)] & ctype_digit) != 0)3446 while ((digitab[*(++ptr)] & ctype_digit) != 0) 3227 3447 n = n * 10 + *ptr - '0'; 3228 3448 if (n > 255) { … … 3327 3547 const uschar *called; 3328 3548 recno = 0; 3329 3330 while ((cd->ctypes[*ptr] & ctype_digit) != 0) 3549 while ((digitab[*ptr] & ctype_digit) != 0) 3331 3550 recno = recno * 10 + *ptr++ - '0'; 3332 3551 … … 3342 3561 *code = OP_END; 3343 3562 called = (recno == 0) ? 3344 cd->start_code : find_bracket(cd->start_code, recno);3563 cd->start_code : find_bracket(cd->start_code, utf8, recno); 3345 3564 3346 3565 if (called == NULL) { … … 3589 3808 case '\\': 3590 3809 tempptr = ptr; 3591 c = check_escape(&ptr, errorptr, *brackets, options, FALSE , cd);3810 c = check_escape(&ptr, errorptr, *brackets, options, FALSE); 3592 3811 3593 3812 /* Handle metacharacters introduced by \. For ones like \d, the ESC_ values … … 3683 3902
