langregexp.c

Go to the documentation of this file.
00001 
00002 /*  $Id: langregexp.c 1260 2006-04-13 06:13:10Z sethdill $    */
00003 
00004 /******************************************************************************
00005 
00006     UserLand Frontier(tm) -- High performance Web content management,
00007     object database, system-level and Internet scripting environment,
00008     including source code editing and debugging.
00009 
00010     Copyright (C) 1992-2004 UserLand Software, Inc.
00011 
00012     This program is free software; you can redistribute it and/or modify
00013     it under the terms of the GNU General Public License as published by
00014     the Free Software Foundation; either version 2 of the License, or
00015     (at your option) any later version.
00016 
00017     This program is distributed in the hope that it will be useful,
00018     but WITHOUT ANY WARRANTY; without even the implied warranty of
00019     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00020     GNU General Public License for more details.
00021 
00022     You should have received a copy of the GNU General Public License
00023     along with this program; if not, write to the Free Software
00024     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00025 
00026 ******************************************************************************/
00027 
00028 #include "frontier.h"
00029 #include "standard.h"
00030 
00031 #ifdef flregexpverbs
00032 
00033 #include "error.h"
00034 #include "memory.h"
00035 #include "ops.h"
00036 #include "resources.h"
00037 #include "strings.h"
00038 #include "lang.h"
00039 #include "langinternal.h"
00040 #include "langexternal.h"
00041 #include "langsystem7.h"
00042 #include "process.h"
00043 #include "tableinternal.h"
00044 #include "tablestructure.h"
00045 #include "tableverbs.h"
00046 #include "op.h"
00047 #include "oplist.h"
00048 #include "kernelverbs.h"
00049 #include "kernelverbdefs.h"
00050 #include "search.h"
00051 #include "byteorder.h"  /* 2006-04-08 aradke: endianness conversion macros */
00052 
00053 #include "langregexp.h"
00054 
00055 /*
00056     How to add the files from the Perl-Compatible Regular Expression (PCRE)
00057     library to the CodeWarrior project for Frontier on Mac OS X:
00058     
00059     1. Download the source code from http://pcre.org/ and
00060     decompress the archive file
00061     
00062     2. Run configure and then make on OS X so that all the
00063     missing .h and .c files will be created
00064     
00065     3. Copy all the following files into the PCRE directory of
00066     the Frontier source code tree, change the end-of-line
00067     terminator to cr (Mac) and make the changes
00068     listed for each file:
00069     
00070     config.h
00071     - rename file to pcre_config.h
00072     - line 45: #define NEWLINE to '\x0D' (cr) instead of '\n'
00073     
00074     internal.h
00075     - rename file to pcre_internal.h
00076     - line 40: #include pcre_config instead of config.h
00077     - line 227: #undef TRUE and FALSE before defining them
00078         because they are also defined in Frontier's standard headers
00079     - line 659: #define POSIX_MALLOC_THRESHOLD to 10 to make
00080         pcre.c compile
00081     
00082     pcre.h
00083     - line 163: extend the function template for pcre_exec to
00084         match the call from regexpexechandle, i.e. add an
00085         int parameter for the match length and a pointer to a
00086         const unsigned char for the character table
00087     
00088     chartables.c
00089     [no changes]
00090     
00091     get.c:
00092     - line 43: #include pcre_internal.h instead of internal.h
00093     
00094     maketables.c:
00095     - line 42: #include pcre_internal.h instead of internal.h
00096     
00097     pcre.c:
00098     - line 52: #include pcre_internal.h instead of internal.h
00099     - line 2648, 4167, 4193, 4223: add space between closing
00100         parenthesis and semicolon to avoid compiler warning
00101     - line 7193: add comment about match_length parameter
00102     - line 7201: add comments about changes
00103     - line 7213: add an int parameter for the match length
00104         and a pointer to a const unsigned char for the character
00105         table
00106     - line 7222: define a const uschar ptr named stop_match
00107     - line 7263: initialize stop_match to
00108         match_block.start_subject + start_offset + match_length
00109     - line 7283, 7284, 7359: replace reference to re->tables
00110         with name of new character parameter
00111     - line 7379, 7383, 7393, 7402, 7526: replace end_subject
00112         with stop_match
00113     
00114     study.c:
00115     - line 39: #include pcre_internal.h instead of internal.h
00116     
00117     4. Add get.c, maketables.c, pcre.c, and study.c to the
00118     CodeWarrior project
00119     
00120     5. #define PCRE_STATIC before #including pcre.h anywhere
00121 */
00122 
00123 #include "pcre_internal.h"
00124 
00125 
00126 static unsigned char regexp_default_tables[] = {
00127 
00128 #ifdef MACVERSION
00129 
00130 /* This table is a lower casing table. */
00131 
00132       0,  1,  2,  3,  4,  5,  6,  7,
00133       8,  9, 10, 11, 12, 13, 14, 15,
00134      16, 17, 18, 19, 20, 21, 22, 23,
00135      24, 25, 26, 27, 28, 29, 30, 31,
00136      32, 33, 34, 35, 36, 37, 38, 39,
00137      40, 41, 42, 43, 44, 45, 46, 47,
00138      48, 49, 50, 51, 52, 53, 54, 55,
00139      56, 57, 58, 59, 60, 61, 62, 63,
00140      64, 97, 98, 99,100,101,102,103,
00141     104,105,106,107,108,109,110,111,
00142     112,113,114,115,116,117,118,119,
00143     120,121,122, 91, 92, 93, 94, 95,
00144      96, 97, 98, 99,100,101,102,103,
00145     104,105,106,107,108,109,110,111,
00146     112,113,114,115,116,117,118,119,
00147     120,121,122,123,124,125,126,127,
00148     138,140,141,142,150,154,159,135,
00149     136,137,138,139,140,141,142,143,
00150     144,145,146,147,148,149,150,151,
00151     152,153,154,155,156,157,158,159,
00152     160,161,162,163,164,165,166,167,
00153     168,169,170,171,172,173,190,191,
00154     176,177,178,179,180,181,182,183,
00155     184,185,186,187,188,189,190,191,
00156     192,193,194,195,196,197,198,199,
00157     200,201,202,136,139,155,207,207,
00158     208,209,210,211,212,213,214,215,
00159     216,216,218,219,220,221,222,223,
00160     224,225,226,227,228,137,144,135,
00161     145,143,146,148,149,147,151,153,
00162     240,152,156,158,157,245,246,247,
00163     248,249,250,251,252,253,254,255,
00164 
00165 /* This table is a case flipping table. */
00166 
00167       0,  1,  2,  3,  4,  5,  6,  7,
00168       8,  9, 10, 11, 12, 13, 14, 15,
00169      16, 17, 18, 19, 20, 21, 22, 23,
00170      24, 25, 26, 27, 28, 29, 30, 31,
00171      32, 33, 34, 35, 36, 37, 38, 39,
00172      40, 41, 42, 43, 44, 45, 46, 47,
00173      48, 49, 50, 51, 52, 53, 54, 55,
00174      56, 57, 58, 59, 60, 61, 62, 63,
00175      64, 97, 98, 99,100,101,102,103,
00176     104,105,106,107,108,109,110,111,
00177     112,113,114,115,116,117,118,119,
00178     120,121,122, 91, 92, 93, 94, 95,
00179      96, 65, 66, 67, 68, 69, 70, 71,
00180      72, 73, 74, 75, 76, 77, 78, 79,
00181      80, 81, 82, 83, 84, 85, 86, 87,
00182      88, 89, 90,123,124,125,126,127,
00183     138,140,141,142,150,154,159,231,
00184     203,229,128,204,129,130,131,233,
00185     230,232,234,237,235,236,132,238,
00186     241,239,133,205,242,244,243,134,
00187     160,161,162,163,164,165,166,167,
00188     168,169,170,171,172,173,190,191,
00189     176,177,178,179,180,181,182,183,
00190     184,185,186,187,188,189,174,175,
00191     192,193,194,195,196,197,198,199,
00192     200,201,202,136,139,155,207,206,
00193     208,209,210,211,212,213,214,215,
00194     217,216,218,219,220,221,222,223,
00195     224,225,226,227,228,137,144,135,
00196     145,143,146,148,149,147,151,153,
00197     240,152,156,158,157,245,246,247,
00198     248,249,250,251,252,253,254,255,
00199 
00200 /* This table contains bit maps for various character classes.
00201 Each map is 32 bytes long and the bits run from the least
00202 significant end of each byte. The classes that have their own
00203 maps are: space, xdigit, digit, upper, lower, word, graph
00204 print, punct, and cntrl. Other classes are built from combinations. */
00205 
00206     0x00,0x3E,0x00,0x00,0x01,0x00,0x00,0x00,
00207     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00208     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00209     0x00,0x04,0x00,0x00,0x00,0x00,0x00,0x00,
00210 
00211     0x00,0x00,0x00,0x00,0x00,0x00,0xFF,0x03,
00212     0x7E,0x00,0x00,0x00,0x7E,0x00,0x00,0x00,
00213     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00214     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00215 
00216     0x00,0x00,0x00,0x00,0x00,0x00,0xFF,0x03,
00217     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00218     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00219     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00220 
00221     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00222     0xFE,0xFF,0xFF,0x07,0x00,0x00,0x00,0x00,
00223     0x7F,0x00,0x00,0x00,0x00,0xC0,0x00,0x00,
00224     0x00,0x78,0x00,0x02,0xE0,0xFF,0x1E,0x00,
00225 
00226     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00227     0x00,0x00,0x00,0x00,0xFE,0xFF,0xFF,0x07,
00228     0x80,0xFF,0xFF,0xFF,0x80,0x00,0x00,0xC0,
00229     0x00,0x80,0x00,0xC1,0x00,0x00,0x00,0x00,
00230 
00231     0x00,0x00,0x00,0x00,0x00,0x00,0xFF,0x03,
00232     0xFE,0xFF,0xFF,0x87,0xFE,0xFF,0xFF,0x07,
00233     0xFF,0xFF,0xFF,0xFF,0x80,0xC0,0x00,0xC0,
00234     0x00,0xF8,0x00,0xC3,0xE0,0xFF,0x1E,0x00,
00235 
00236     0x00,0x00,0x00,0x00,0xFE,0xFF,0xFF,0xFF,
00237     0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0x7F,
00238     0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
00239     0xFF,0xFB,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
00240 
00241     0x00,0x3E,0x00,0x00,0xFF,0xFF,0xFF,0xFF,
00242     0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0x7F,
00243     0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
00244     0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
00245 
00246     0x00,0x00,0x00,0x00,0xFE,0xFF,0x00,0xFC,
00247     0x01,0x00,0x00,0xF8,0x01,0x00,0x00,0x78,
00248     0x00,0x00,0x00,0x00,0x7F,0x3F,0xFF,0x3F,
00249     0xFF,0x03,0xFF,0x3C,0x1F,0x00,0xE1,0xFF,
00250 
00251     0xFF,0xFF,0xFF,0xFF,0x00,0x00,0x00,0x00,
00252     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
00253     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00254     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00255 
00256 /* This table identifies various classes of character by individual bits:
00257   0x01   white space character
00258   0x02   letter
00259   0x04   decimal digit
00260   0x08   hexadecimal digit
00261   0x10   alphanumeric or '_'
00262   0x80   regular expression metacharacter or binary zero
00263 */
00264 
00265     0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00266     0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00,
00267     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00268     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00269     0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00,
00270     0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00,
00271     0x1C,0x1C,0x1C,0x1C,0x1C,0x1C,0x1C,0x1C,
00272     0x1C,0x1C,0x00,0x00,0x00,0x00,0x00,0x80,
00273     0x00,0x1A,0x1A,0x1A,0x1A,0x1A,0x1A,0x12,
00274     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
00275     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
00276     0x12,0x12,0x12,0x80,0x00,0x00,0x80,0x10,
00277     0x00,0x1A,0x1A,0x1A,0x1A,0x1A,0x1A,0x12,
00278     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
00279     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
00280     0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00,
00281     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
00282     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
00283     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
00284     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
00285     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x12,
00286     0x00,0x00,0x00,0x00,0x00,0x00,0x12,0x12,
00287     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00288     0x00,0x00,0x00,0x00,0x00,0x00,0x12,0x12,
00289     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00290     0x00,0x00,0x01,0x12,0x12,0x12,0x12,0x12,
00291     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00292     0x12,0x12,0x00,0x00,0x00,0x00,0x12,0x12,
00293     0x00,0x00,0x00,0x00,0x00,0x12,0x12,0x12,
00294     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
00295     0x00,0x12,0x12,0x12,0x12,0x00,0x00,0x00,
00296     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
00297 
00298 #endif
00299 
00300 
00301 #ifdef WIN95VERSION
00302 
00303 /* This table is a lower casing table. */
00304 
00305       0,  1,  2,  3,  4,  5,  6,  7,
00306       8,  9, 10, 11, 12, 13, 14, 15,
00307      16, 17, 18, 19, 20, 21, 22, 23,
00308      24, 25, 26, 27, 28, 29, 30, 31,
00309      32, 33, 34, 35, 36, 37, 38, 39,
00310      40, 41, 42, 43, 44, 45, 46, 47,
00311      48, 49, 50, 51, 52, 53, 54, 55,
00312      56, 57, 58, 59, 60, 61, 62, 63,
00313      64, 97, 98, 99,100,101,102,103,
00314     104,105,106,107,108,109,110,111,
00315     112,113,114,115,116,117,118,119,
00316     120,121,122, 91, 92, 93, 94, 95,
00317      96, 97, 98, 99,100,101,102,103,
00318     104,105,106,107,108,109,110,111,
00319     112,113,114,115,116,117,118,119,
00320     120,121,122,123,124,125,126,127,
00321     128,129,130,131,132,133,134,135,
00322     136,137,138,139,140,141,142,143,
00323     144,145,146,147,148,149,150,151,
00324     152,153,154,155,156,157,158,159,
00325     160,161,162,163,164,165,166,167,
00326     168,169,170,171,172,173,174,175,
00327     176,177,178,179,180,181,182,183,
00328     184,185,186,187,188,189,190,191,
00329     224,225,226,227,228,229,230,231,
00330     232,233,234,235,236,237,238,239,
00331     208,241,242,243,244,245,246,215,
00332     248,249,250,251,252,221,222,223,
00333     224,225,226,227,228,229,230,231,
00334     232,233,234,235,236,237,238,239,
00335     240,241,242,243,244,245,246,247,
00336     248,249,250,251,252,253,254,255,
00337 
00338 /* This table is a case flipping table. */
00339 
00340       0,  1,  2,  3,  4,  5,  6,  7,
00341       8,  9, 10, 11, 12, 13, 14, 15,
00342      16, 17, 18, 19, 20, 21, 22, 23,
00343      24, 25, 26, 27, 28, 29, 30, 31,
00344      32, 33, 34, 35, 36, 37, 38, 39,
00345      40, 41, 42, 43, 44, 45, 46, 47,
00346      48, 49, 50, 51, 52, 53, 54, 55,
00347      56, 57, 58, 59, 60, 61, 62, 63,
00348      64, 97, 98, 99,100,101,102,103,
00349     104,105,106,107,108,109,110,111,
00350     112,113,114,115,116,117,118,119,
00351     120,121,122, 91, 92, 93, 94, 95,
00352      96, 65, 66, 67, 68, 69, 70, 71,
00353      72, 73, 74, 75, 76, 77, 78, 79,
00354      80, 81, 82, 83, 84, 85, 86, 87,
00355      88, 89, 90,123,124,125,126,127,
00356     128,129,130,131,132,133,134,135,
00357     136,137,138,139,140,141,142,143,
00358     144,145,146,147,148,149,150,151,
00359     152,153,154,155,156,157,158,159,
00360     160,161,162,163,164,165,166,167,
00361     168,169,170,171,172,173,174,175,
00362     176,177,178,179,180,181,182,183,
00363     184,185,186,187,188,189,190,191,
00364     224,225,226,227,228,229,230,231,
00365     232,233,234,235,236,237,238,239,
00366     208,241,242,243,244,245,246,215,
00367     248,249,250,251,252,221,222,223,
00368     192,193,194,195,196,197,198,199,
00369     200,201,202,203,204,205,206,207,
00370     240,209,210,211,212,213,214,247,
00371     216,217,218,219,220,253,254,141,
00372 
00373 /* This table contains bit maps for various character classes.
00374 Each map is 32 bytes long and the bits run from the least
00375 significant end of each byte. The classes that have their own
00376 maps are: space, xdigit, digit, upper, lower, word, graph
00377 print, punct, and cntrl. Other classes are built from combinations. */
00378 
00379     0x00,0x3E,0x00,0x00,0x01,0x00,0x00,0x00,
00380     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00381     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00382     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00383 
00384     0x00,0x00,0x00,0x00,0x00,0x00,0xFF,0x03,
00385     0x7E,0x00,0x00,0x00,0x7E,0x00,0x00,0x00,
00386     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00387     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00388 
00389     0x00,0x00,0x00,0x00,0x00,0x00,0xFF,0x03,
00390     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00391     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00392     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00393 
00394     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00395     0xFE,0xFF,0xFF,0x07,0x00,0x00,0x00,0x00,
00396     0x00,0x14,0x00,0x80,0x00,0x00,0x00,0x00,
00397     0xFF,0xFF,0x7F,0x7F,0x00,0x00,0x00,0x00,
00398 
00399     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00400     0x00,0x00,0x00,0x00,0xFE,0xFF,0xFF,0x07,
00401     0x08,0x00,0x00,0x14,0x00,0x00,0x00,0x00,
00402     0x00,0x00,0x00,0x80,0xFF,0xFF,0x7F,0xFF,
00403 
00404     0x00,0x00,0x00,0x00,0x00,0x00,0xFF,0x03,
00405     0xFE,0xFF,0xFF,0x87,0xFE,0xFF,0xFF,0x07,
00406     0x08,0x14,0x00,0x94,0x00,0x00,0x00,0x00,
00407     0xFF,0xFF,0x7F,0xFF,0xFF,0xFF,0x7F,0xFF,
00408 
00409     0x00,0x00,0x00,0x00,0xFE,0xFF,0xFF,0xFF,
00410     0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0x7F,
00411     0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
00412     0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
00413 
00414     0x00,0x3E,0x00,0x00,0xFF,0xFF,0xFF,0xFF,
00415     0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0x7F,
00416     0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
00417     0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
00418 
00419     0x00,0x00,0x00,0x00,0xFE,0xFF,0x00,0xFC,
00420     0x01,0x00,0x00,0xF8,0x01,0x00,0x00,0x78,
00421     0xF7,0xEB,0xFF,0x6B,0xFF,0xFF,0xFF,0xFF,
00422     0x00,0x00,0x80,0x00,0x00,0x00,0x80,0x00,
00423 
00424     0xFF,0xFF,0xFF,0xFF,0x00,0x00,0x00,0x00,
00425     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
00426     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00427     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00428 
00429 /* This table identifies various classes of character by individual bits:
00430   0x01   white space character
00431   0x02   letter
00432   0x04   decimal digit
00433   0x08   hexadecimal digit
00434   0x10   alphanumeric or '_'
00435   0x80   regular expression metacharacter or binary zero
00436 */
00437 
00438     0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00439     0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00,
00440     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00441     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00442     0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00,
00443     0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00,
00444     0x1C,0x1C,0x1C,0x1C,0x1C,0x1C,0x1C,0x1C,
00445     0x1C,0x1C,0x00,0x00,0x00,0x00,0x00,0x80,
00446     0x00,0x1A,0x1A,0x1A,0x1A,0x1A,0x1A,0x12,
00447     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
00448     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
00449     0x12,0x12,0x12,0x80,0x00,0x00,0x80,0x10,
00450     0x00,0x1A,0x1A,0x1A,0x1A,0x1A,0x1A,0x12,
00451     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
00452     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
00453     0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00,
00454     0x00,0x00,0x00,0x12,0x00,0x00,0x00,0x00,
00455     0x00,0x00,0x12,0x00,0x12,0x00,0x00,0x00,
00456     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00457     0x00,0x00,0x12,0x00,0x12,0x00,0x00,0x12,
00458     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00459     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00460     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00461     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00462     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
00463     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
00464     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x00,
00465     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
00466     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
00467     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
00468     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x00,
00469     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12
00470 
00471 #endif
00472 
00473     };
00474 
00475 /* pascal strings for regexp.compile */
00476 
00477 #define STR_flCaseSensitive         BIGSTRING ("\x0f" "flCaseSensitive")
00478 #define STR_flDotMatchesAll         BIGSTRING ("\x0f" "flDotMatchesAll")
00479 #define STR_flMultiLine             BIGSTRING ("\x0b" "flMultiLine")
00480 #define STR_flAutoCapture           BIGSTRING ("\x0d" "flAutoCapture")
00481 #define STR_flGreedyQuantifiers     BIGSTRING ("\x13" "flGreedyQuantifiers")
00482 #define STR_flMatchEmptyString      BIGSTRING ("\x12" "flMatchEmptyString")
00483 #define STR_flExtendedMode          BIGSTRING ("\x0e" "flExtendedMode")
00484 
00485 /* pascal strings for regexp.match */
00486 
00487 #define STR_ix                      BIGSTRING ("\x02" "ix")
00488 #define STR_ct                      BIGSTRING ("\x02" "ct")
00489 #define STR_adrMatchInfoTable       BIGSTRING ("\x11" "adrMatchInfoTable")
00490 #define STR_flMakeGroups            BIGSTRING ("\x0c" "flMakeGroups")
00491 #define STR_flMakeNamedGroups       BIGSTRING ("\x11" "flMakeNamedGroups")
00492 
00493 /* pascal strings for the match info table */
00494 
00495 #define STR_matchOffset             BIGSTRING ("\x0b" "matchOffset")
00496 #define STR_matchLength             BIGSTRING ("\x0b" "matchLength")
00497 #define STR_matchString             BIGSTRING ("\x0b" "matchString")
00498 #define STR_groupOffsets            BIGSTRING ("\x0c" "groupOffsets")
00499 #define STR_groupLengths            BIGSTRING ("\x0c" "groupLengths")
00500 #define STR_groupStrings            BIGSTRING ("\x0c" "groupStrings")
00501 #define STR_namedGroups             BIGSTRING ("\x0b" "namedGroups")
00502 #define STR_groupNumber             BIGSTRING ("\x0b" "groupNumber")
00503 
00504 /* pascal strings for regexp.replace */
00505 
00506 #define STR_maxReplacements         BIGSTRING ("\x0f" "maxReplacements")
00507 #define STR_adrReplacementCount     BIGSTRING ("\x13" "adrReplacementCount")
00508 #define STR_adrCallback             BIGSTRING ("\x0b" "adrCallback")
00509 #define STR_matchInfo               BIGSTRING ("\x09" "matchInfo")
00510 #define STR_replacementString       BIGSTRING ("\x11" "replacementString")
00511 
00512 /* pascal strings for regexp.extract */
00513 
00514 #define STR_groups                  BIGSTRING ("\x06" "groups")
00515 
00516 /* pascal strings for regexp.split */
00517 
00518 #define STR_maxSplits               BIGSTRING ("\x09" "maxSplits")
00519 
00520 /* pascal strings for regexp.visit */
00521 
00522 #define STR_maxRuns                 BIGSTRING ("\x07" "maxRuns")
00523 #define STR_matchInfoTable          BIGSTRING ("\x0e" "matchInfoTable")
00524 
00525 /* pascal strings for regexp.grep */
00526 
00527 #define STR_flIncludeMatches        BIGSTRING ("\x10" "flIncludeMatches")
00528 
00529 /* pascal strings for regexp.getPatternInfo */
00530 
00531 #define STR_backRefMax              BIGSTRING ("\x0a" "backRefMax")
00532 #define STR_captureCount            BIGSTRING ("\x0c" "captureCount")
00533 #define STR_firstByte               BIGSTRING ("\x09" "firstByte")
00534 #define STR_firstByteTable          BIGSTRING ("\x0e" "firstByteTable")
00535 #define STR_lastLiteral             BIGSTRING ("\x0b" "lastLiteral")
00536 #define STR_nameTable               BIGSTRING ("\x09" "nameTable")
00537 #define STR_options                 BIGSTRING ("\x07" "options")
00538 #define STR_size                    BIGSTRING ("\x04" "size")
00539 #define STR_studySize               BIGSTRING ("\x09" "studySize")
00540 
00541 /* identifiers for use in packed patterns */
00542 
00543 #define SYSTEM_ID_MAC   0
00544 #define SYSTEM_ID_WIN   1
00545 
00546 #ifdef MACVERSION
00547 #define SYSTEM_ID   SYSTEM_ID_MAC
00548 #endif
00549 
00550 #ifdef WIN95VERSION
00551 #define SYSTEM_ID   SYSTEM_ID_WIN
00552 #endif
00553 
00554 #define CURRENT_VERSION     0
00555 
00556 
00557 /* typedefs */
00558 
00559 typedef enum tyregexptoken { /*verbs that are processed by langregexp.c*/
00560     
00561     compilefunc,
00562     
00563     matchfunc,
00564     
00565     replacefunc,
00566     
00567     extractfunc,
00568     
00569     splitfunc,
00570     
00571     joinfunc,
00572     
00573     visitfunc,
00574 
00575     grepfunc,
00576     
00577     getpatterninfofunc,
00578     
00579     expandfunc,
00580     
00581     ctregexpverbs
00582     
00583     } tyregexptoken;
00584 
00585 
00586 typedef enum tyreplacementtoken { /*identifies parts of a replacement string*/
00587 
00588     REGEXP_TERMINATOR,  /*signals end of sequence of replacement parts*/
00589     
00590     REGEXP_LITERAL,     /*literal string*/
00591     
00592     REGEXP_NUMBERED,    /*numbered reference to a captured substring*/
00593     
00594     REGEXP_NAMED        /*named reference to a captured substring*/
00595 
00596     } tyreplacementtoken;
00597 
00598 
00599 typedef struct tycompiledpattern { /*internal representation of a compiled pattern*/
00600 
00601     OSType type;            /*type of binary object, required to be 'PCRE'*/
00602     
00603     short systemid;         /* 0 = Mac, 1 = Win */
00604     
00605     short version;          /*version of this record, increment to have older versions
00606                                 of Frontier recognize newer patterns as invalid*/
00607     
00608     short pcreversionmajor;     /*PCRE version major*/
00609     
00610     short pcreversionminor;     /*PCRE version minor*/
00611     
00612     short unused[4];        /*reserved for future use*/
00613     
00614     int options;            /*options supplied to regexpcompileverb*/
00615     
00616     int capturecount;       /*number of captured substrings, determines size of ovector passed into pcre_exec*/
00617     
00618     int ovecsize;           /*number of slots in the ovector*/
00619     
00620     int pattern_offset;     /*offset of pattern structure in this record*/
00621     
00622     int pattern_size;       /*size allocated for pattern structure*/
00623     
00624     int study_offset;       /*offset of study_data structure in this record*/
00625     
00626     int study_size;         /*size allocated for study_data structure*/
00627     
00628     int unused2[4];         /*reserved for future use*/
00629     } tycompiledpattern;
00630 
00631 
00632 typedef struct tyreplacepart { /*describes parts of a replacement string*/
00633 
00634     tyreplacementtoken type;    /*type of part*/
00635     
00636     int ix;                     /*index into replacement string*/
00637     
00638     int len;                    /*length in replacement string*/
00639     
00640     int ref;                    /*numbered substring*/
00641     
00642     } tyreplacepart, *ptrreplacepart, **hdlreplacepart;
00643 
00644 
00645 typedef struct tyreplscancompileinfo {
00646     
00647     handlestream s;
00648     
00649     Handle hcp;
00650     
00651     } tyreplscancompileinfo;
00652 
00653 
00654 typedef struct tyreplscanexpandinfo {
00655     
00656     handlestream s;
00657     
00658     Handle hsubject;
00659     
00660     hdllistrecord hgroups;
00661     
00662     hdlhashtable htnames;
00663     
00664     } tyreplscanexpandinfo;
00665 
00666 
00667 typedef struct tyregexpsearchinfo {
00668     
00669     handlestream s;
00670     
00671     char *p;
00672     
00673     Handle hcp;
00674     
00675     Handle hovector;
00676     
00677     } tyregexpsearchinfo;
00678 
00679 
00680 typedef struct tywritelistinfo {
00681 
00682     handlestream *s;
00683     
00684     Handle hglue;
00685     } tywritelistinfo;
00686 
00687 
00688 typedef struct tygreplistinfo {
00689 
00690     Handle hcp;
00691     
00692     Handle hovec;
00693 
00694     hdllistrecord hresult;
00695     
00696     boolean flincludematches;
00697     
00698     ptrstring bserrorptr;
00699     } tygreplistinfo;
00700 
00701 
00702 typedef boolean (*tyreplscanliteralcallback) (int ix, int len, bigstring bserror, void *refcon);
00703 
00704 typedef boolean (*tyreplscannumberedcallback) (int ix, int len, int ref, bigstring bserror, void *refcon);
00705 
00706 typedef boolean (*tyreplscannamedcallback) (int ix, int len, const char *cptr, int clen, bigstring bserror, void *refcon);
00707 
00708 typedef void (*tyreplscanbadgroupnamecallback) (int pos, bigstring bsmsg);
00709 
00710 
00711 static unsigned char *chartableptr = nil;
00712 
00713 
00714 #ifdef MACVERSION
00715 #pragma mark === lang errors ===
00716 #endif
00717 
00718 
00719 static void regexpcompilationerror (const char *errmsg, int errpos, bigstring bserror) {
00720     
00721     bigstring bserrmsg;
00722     bigstring bserrpos;
00723     
00724     copyctopstring (errmsg, bserrmsg);
00725     
00726     numbertostring (errpos, bserrpos);
00727     
00728     getstringlist (langerrorlist, regexpcompileerror, bserror);
00729     
00730     parsedialogstring (bserror, bserrmsg, bserrpos, nil, nil, bserror);
00731 
00732     return;
00733     } /*regexpcompilationerror*/
00734 
00735 
00736 static void regexpverberrorwithnumber (short stringnum, int x, bigstring bserror) {
00737 
00738     bigstring bsnum;
00739     
00740     getstringlist (langerrorlist, stringnum, bserror);
00741     
00742     numbertostring (x, bsnum);
00743     
00744     parsedialogstring (bserror, BIGSTRING ("\x02" "^0"), bsnum, nil, nil, bserror);
00745 
00746     return;
00747     } /*regexpverberrorwithnumber*/
00748 
00749 
00750 static void regexperrorwithnumber (short stringnum, int x, bigstring bserror) {
00751 
00752     bigstring bsnum;
00753     
00754     getstringlist (langerrorlist, stringnum, bserror);
00755     
00756     numbertostring (x, bsnum);
00757     
00758     parsedialogstring (bserror, bsnum, nil, nil, nil, bserror);
00759 
00760     return;
00761     } /*regexperrorwithnumber*/
00762 
00763 
00764 #ifdef MACVERSION
00765 #pragma mark === pattern value accessors ===
00766 #endif
00767 
00768 
00769 static int getoptions (Handle hcp) {
00770     return ((**((tycompiledpattern**)hcp)).options);
00771     } /*getoptions*/
00772 
00773 
00774 static int getcapturecount (Handle hcp) {
00775     return ((**((tycompiledpattern**)hcp)).capturecount);
00776     } /*getcapturecount*/
00777 
00778 
00779 static int getovectorsize (Handle hcp) {
00780     return ((**((tycompiledpattern**)hcp)).ovecsize);
00781     } /*getovectorsize*/
00782 
00783 
00784 static int getgroupoffset (Handle hovec, int ix) {
00785     return (*((int*)(*hovec) + 2 * ix));
00786     } /*getgroupoffset*/
00787 
00788 
00789 static int getgroupend (Handle hovec, int ix) {
00790     return (*((int*)(*hovec) + 2 * ix + 1));
00791     } /*getgroupend*/
00792 
00793 
00794 static int getgrouplength (Handle hovec, int ix) {
00795     return (getgroupend (hovec, ix) - getgroupoffset (hovec, ix));
00796     } /*getgrouplength*/
00797 
00798 
00799 static pcre * getpatternref (Handle hcp) {
00800     
00801     /*
00802     Compute a ptr to the pcre part of the handle that is only valid
00803     until the memory manager relocates our block of memory.
00804     */
00805     
00806     return ((pcre *) ((char *) *hcp + (**((tycompiledpattern**)hcp)).pattern_offset));
00807     } /*getpatternref*/
00808 
00809 
00810 static real_pcre * getrealpatternref (Handle hcp) {
00811     
00812     /*
00813     Compute a ptr to the pcre part of the handle that is only valid
00814     until the memory manager relocates our block of memory.
00815     */
00816     
00817     return ((real_pcre *) ((char *) *hcp + (**((tycompiledpattern**)hcp)).pattern_offset));
00818     } /*getrealpatternref*/
00819 
00820 
00821 static void * getstudydata (Handle hcp) {
00822     
00823     /*
00824     Compute a ptr to the study_data part of the handle that is only valid
00825     until the memory manager relocates our block of memory.
00826     */
00827     
00828     return ((void *) (((**((tycompiledpattern**)hcp)).study_size != nil) ? ((char *) *hcp + ((**((tycompiledpattern**)hcp)).study_offset)) : nil));
00829     } /*getstudydata*/
00830 
00831 
00832 static void initpcreextra (Handle hcp, pcre_extra *extraref) {
00833     
00834     void *study_data = getstudydata (hcp);
00835     
00836     clearbytes (extraref, sizeof (pcre_extra));
00837     
00838     if (study_data != nil) {
00839         extraref->flags |= PCRE_EXTRA_STUDY_DATA;
00840         extraref->study_data = study_data;
00841         }
00842     } /*initpcreextra*/
00843 
00844 
00845 #ifdef MACVERSION
00846 #pragma mark === param handling ===
00847 #endif
00848 
00849 
00850 static boolean getoptionallistparam (hdltreenode hfirst, short *ctconsumed, short *ctpositional, bigstring bsparam, hdllistrecord *hlist) {
00851     
00852     /*
00853     Get an optional parameter of type list that defaults to an empty list.
00854     */
00855     
00856     tyvaluerecord vparam;
00857     
00858     initvalue (&vparam, listvaluetype);
00859 
00860     if (!getoptionalparamvalue (hfirst, ctconsumed, ctpositional, bsparam, &vparam))
00861         return (false);
00862     
00863     if (vparam.data.binaryvalue == nil) {
00864         
00865         if (!opnewlist (hlist, false))
00866             return (false);
00867         
00868         if (!setheapvalue ((Handle) *hlist, listvaluetype, &vparam))
00869             return (false);
00870         }
00871 
00872     *hlist = (hdllistrecord) vparam.data.binaryvalue;
00873 
00874     return (true);
00875     } /*getoptionallistparam*/
00876 
00877 
00878 #ifdef MACVERSION
00879 #pragma mark === utils ===
00880 #endif
00881 
00882 
00883 static boolean regexpgetpatternvalue (hdltreenode hp1, short pnum, boolean flreadonly, bigstring bserror, Handle *h, Handle *hovec) {
00884 
00885     /*
00886     Get a compiled pattern from the code tree for the params.
00887     
00888     If we don't yield control to other threads, it's safe
00889     to request a read-only copy to reduce memory overhead.
00890     However, if we run a callback script for example, it
00891     just might destroy the original pattern value we are
00892     looking at and cause us to crash. For those kernel verbs,
00893     we make our own copy of the pattern value. (Currently,
00894     these verbs are regexp.replace and regexp.visit.)
00895     
00896     Since the usual case is to request a readonly value,
00897     we should never modify any part of the pattern value.
00898     */
00899     
00900     tyvaluerecord val;
00901     Handle hcp;
00902     boolean flvalid;    
00903     
00904     if (!getreadonlyparamvalue (hp1, pnum, &val))
00905         return (false);
00906     
00907     if ((!flreadonly) || val.valuetype != binaryvaluetype) {
00908         
00909         if (!copyvaluerecord (val, &val))
00910             return (false);
00911         
00912         if (!coercetobinary (&val))
00913             return (false);
00914         }
00915     
00916     hcp = val.data.binaryvalue;
00917     
00918     /* consistency checks */
00919     
00920     flvalid = gethandlesize (hcp) >= sizeof (tycompiledpattern);
00921     
00922     flvalid = flvalid && conditionallongswap ((**((tycompiledpattern**)hcp)).type) == MAGIC_NUMBER;
00923         
00924     flvalid = flvalid && conditionalshortswap ((**((tycompiledpattern**)hcp)).systemid) == SYSTEM_ID;
00925         
00926     flvalid = flvalid && conditionalshortswap ((**((tycompiledpattern**)hcp)).version) == CURRENT_VERSION;
00927         
00928     flvalid = flvalid && conditionalshortswap ((**((tycompiledpattern**)hcp)).pcreversionmajor) == PCRE_MAJOR;
00929         
00930     flvalid = flvalid && conditionalshortswap ((**((tycompiledpattern**)hcp)).pcreversionminor) == PCRE_MINOR;
00931         
00932     flvalid = flvalid && (**((tycompiledpattern**)hcp)).pattern_offset == sizeof (tycompiledpattern);
00933         
00934     flvalid = flvalid && ((**((tycompiledpattern**)hcp)).study_offset == 0
00935                             || (**((tycompiledpattern**)hcp)).study_offset == (sizeof (tycompiledpattern) + (**((tycompiledpattern**)hcp)).pattern_size));
00936     
00937     flvalid = flvalid && gethandlesize (hcp) == (sizeof (tycompiledpattern) + (**((tycompiledpattern**)hcp)).pattern_size + (**((tycompiledpattern**)hcp)).study_size);
00938         
00939     flvalid = flvalid && (**((tycompiledpattern**)hcp)).pattern_size == getrealpatternref (hcp) -> size;
00940         
00941     flvalid = flvalid && ((**((tycompiledpattern**)hcp)).study_size == 0
00942                             || ((**((tycompiledpattern**)hcp)).study_offset > 0
00943                                     && (**((tycompiledpattern**)hcp)).study_size == ((pcre_study_data *) getstudydata (hcp)) -> size));
00944     
00945     if (!flvalid) {
00946         getstringlist (langerrorlist, regexpinvaliderror, bserror);
00947         return (false);
00948         }
00949     
00950     if (hovec != nil) {
00951         
00952         if (!regexpnewovector (hcp, hovec))
00953             return (false);
00954     
00955         if (!pushtmpstack (*hovec))
00956             return (false);
00957         }
00958     
00959     /* success! */
00960     
00961     *h = hcp;
00962     
00963     return (true);
00964     } /*regexpgetpatternvalue*/
00965 
00966 
00967 static int regexpexec (Handle hcp, char *subject, int length,
00968                         int ix, int matchlen, Handle hovector,
00969                         bigstring bserror) {
00970 
00971     pcre_extra extra;
00972     int res;
00973     
00974     /* consistency checks */
00975     
00976     if (ix < 0)
00977         ix = 0;
00978     
00979     if (matchlen > length - ix)
00980         matchlen = length - ix;
00981     
00982     if (matchlen < 0)
00983         matchlen = 0;
00984     
00985     /* run pcre engine */
00986     
00987     initpcreextra (hcp, &extra);
00988 
00989     res = pcre_exec (getpatternref (hcp), &extra,
00990                         subject, length, ix, matchlen,
00991                         getoptions (hcp) & PUBLIC_EXEC_OPTIONS,
00992                         (int *)*hovector, getovectorsize (hcp), chartableptr); 
00993 
00994     /* process execution errors here, presumably indicative of a bug in the PCRE library */
00995     
00996     if (res <= 0 && res != PCRE_ERROR_NOMATCH) {
00997         
00998         // "Can't do regexp.foo because an internal regexp error occurred (code X)."
00999     
01000         regexpverberrorwithnumber (regexpinternalerror, res, bserror);
01001         }
01002 
01003     return (res);
01004     } /*regexpexec*/
01005 
01006 
01007 static int regexpexechandle (Handle hcp, Handle h,
01008                                 int ix, int matchlen, Handle hovector,
01009                                 bigstring bserror) {
01010 
01011     return (regexpexec (hcp, *h, gethandlesize (h), ix, matchlen, hovector, bserror));
01012     } /*regexpexechandle*/
01013 
01014 
01015 static boolean regexpgetpatterninfo (Handle hcp, int what, bigstring bserror, void *where) {
01016 
01017     pcre_extra extra;
01018     int res;
01019     
01020     initpcreextra (hcp, &extra);
01021     
01022     res = pcre_fullinfo (getpatternref (hcp), &extra, what, where);
01023     
01024     if (res != 0) {
01025         regexpverberrorwithnumber (regexpinternalerror, res, bserror);
01026         return (false);
01027         }
01028     
01029     return (true);
01030     } /*regexpgetpatterninfo*/
01031 
01032 
01033 static boolean regexpbuildmatchinfotable (Handle hsubject, Handle hcp, Handle hovec,
01034                                             boolean flmakegroups, boolean flmakenamedgroups,
01035                                             bigstring bserror, hdlhashtable ht) {
01036 
01037     long offset = getgroupoffset (hovec, 0);
01038     long length = getgrouplength (hovec, 0);
01039     hdllistrecord hoffsets = nil;
01040     hdllistrecord hlengths = nil;
01041     hdllistrecord hstrings = nil;
01042     Handle h;
01043     
01044     /* matchOffset */
01045     
01046     if (!langassignlongvalue (ht, STR_matchOffset, offset + 1))
01047         goto exit;
01048     
01049     /* matchLength */
01050     
01051     if (!langassignlongvalue (ht, STR_matchLength, length))
01052         goto exit;
01053     
01054     /* matchString */
01055 
01056     if (!loadfromhandletohandle (hsubject, &offset, length, false, &h))
01057         goto exit;
01058     
01059     if (!langassigntextvalue (ht, STR_matchString, h)) {
01060         disposehandle (h);
01061         goto exit;
01062         }
01063     
01064     /* make groups */
01065     
01066     if (flmakegroups) {
01067     
01068         tyvaluerecord vtemp;
01069         long j = 0;
01070         
01071         if (!opnewlist (&hoffsets, false))
01072             goto exit;
01073         
01074         if (!opnewlist (&hlengths, false))
01075             goto exit;
01076         
01077         if (!opnewlist (&hstrings, false))
01078             goto exit;
01079         
01080         while ((++j) <= getcapturecount (hcp)) {
01081             
01082             offset = getgroupoffset (hovec, j);
01083             
01084             if (offset >= 0) { /* this group matched */
01085                 
01086                 length = getgrouplength (hovec, j);
01087             
01088                 /* groupOffsets [j] */
01089                 
01090                 if (!langpushlistlong (hoffsets, offset + 1))
01091                     goto exit;
01092             
01093                 /* groupLengths [j] */
01094                 
01095                 if (!langpushlistlong (hlengths, length))
01096                     goto exit;
01097             
01098                 /* groupStrings [j] */
01099                 
01100                 if (!loadfromhandletohandle (hsubject, &offset, length, false, &h))
01101                     goto exit;
01102                 
01103                 if (!langpushlisttext (hstrings, h))
01104                     goto exit;
01105                 }
01106             else { /* this group didn't match */
01107             
01108                 /* groupOffsets [j] */
01109                 
01110                 if (!langpushlistlong (hoffsets, 0))
01111                     goto exit;
01112             
01113                 /* groupLengths [j] */
01114                 
01115                 if (!langpushlistlong (hlengths, 0))
01116                     goto exit;
01117             
01118                 /* groupStrings [j] */
01119                 
01120                 if (!langpushliststring (hstrings, emptystring))
01121                     goto exit;
01122                 
01123                 }
01124             }
01125         
01126         /* groupOffsets */
01127         
01128         initvalue (&vtemp, listvaluetype);
01129         
01130         vtemp.data.binaryvalue = (Handle) hoffsets;
01131         
01132         if (!hashtableassign (ht, STR_groupOffsets, vtemp))
01133             goto exit;
01134         
01135         hoffsets = nil;
01136         
01137         /* groupLengths */
01138         
01139         initvalue (&vtemp, listvaluetype);
01140         
01141         vtemp.data.binaryvalue = (Handle) hlengths;
01142         
01143         if (!hashtableassign (ht, STR_groupLengths, vtemp))
01144             goto exit;
01145         
01146         hlengths = nil;
01147         
01148         /* groupStrings */
01149         
01150         initvalue (&vtemp, listvaluetype);
01151         
01152         vtemp.data.binaryvalue = (Handle) hstrings;
01153         
01154         if (!hashtableassign (ht, STR_groupStrings, vtemp))
01155             goto exit;
01156         
01157         hstrings = nil;
01158         }
01159     
01160     if (flmakenamedgroups) {
01161 
01162         hdlhashtable htgroups, htname;
01163         int res, ct, sz, k, groupnum;
01164         unsigned char *cptr;
01165         bigstring bs;
01166         
01167         res = pcre_fullinfo (getpatternref (hcp), nil, PCRE_INFO_NAMECOUNT, (void *) &ct);
01168         
01169         if (res == 0)
01170             res = pcre_fullinfo (getpatternref (hcp), nil, PCRE_INFO_NAMEENTRYSIZE, (void *) &sz);
01171         
01172         if (res == 0)
01173             res = pcre_fullinfo (getpatternref (hcp), nil, PCRE_INFO_NAMETABLE, (void *) &cptr);
01174         
01175         if (res != 0) {
01176             regexpverberrorwithnumber (regexpinternalerror, res, bserror);
01177             goto exit;
01178             }
01179         
01180         if (!langassignnewtablevalue (ht, STR_namedGroups, &htgroups))
01181             goto exit;
01182 
01183         if (cptr != nil) {
01184             
01185             for (k = 0; k < ct; k++) {
01186                 
01187                 groupnum = (cptr[k*sz] << 8) + cptr[k*sz+1];
01188                 
01189                 copyctopstring ((char *) (cptr + k * sz + 2), bs);
01190                 
01191                 if (!langassignnewtablevalue (htgroups, bs, &htname))
01192                     goto exit;
01193                 
01194                 offset = getgroupoffset (hovec, groupnum);
01195                 
01196                 length = getgrouplength (hovec, groupnum);
01197 
01198                 if (!langassignlongvalue (htname, STR_groupNumber, groupnum))
01199                     goto exit;
01200 
01201                 if (!langassignlongvalue (htname, STR_matchOffset, offset + 1)) /*1-based*/
01202                     goto exit;
01203 
01204                 if (!langassignlongvalue (htname, STR_matchLength, length))
01205                     goto exit;
01206 
01207                 if (!loadfromhandletohandle (hsubject, &offset, length, false, &h))
01208                     goto exit;
01209                 
01210                 if (!langassigntextvalue (htname, STR_matchString, h)) {
01211                     disposehandle (h);
01212                     goto exit;
01213                     }
01214                 } /*for*/
01215             }
01216         }
01217         
01218     return (true);
01219 
01220 exit:
01221     
01222     opdisposelist (hoffsets);
01223     
01224     opdisposelist (hlengths);
01225     
01226     opdisposelist (hstrings);
01227     
01228     return (false);
01229     } /*regexpbuildmatchinfotable*/
01230 
01231 
01232 static boolean writehandlestreamreplpart (handlestream *s, tyreplacementtoken type, int ix, int len, int ref) {
01233     
01234     /*
01235     helper function for regexpscanreplacement
01236     */
01237 
01238     tyreplacepart rp;
01239     
01240     if (type == REGEXP_LITERAL && len == 0) /*never write empty literals*/
01241         return (true);
01242     
01243     rp.type = type;
01244     rp.ix   = ix;
01245     rp.len  = len;
01246     rp.ref  = ref;
01247     
01248     return (writehandlestream (s, &rp, sizeof (rp)));
01249     } /*writehandlestreamreplpart*/
01250 
01251 
01252 static int regexpstringnumberfrompattern (const char *cptr, int len, Handle hcp) {
01253     
01254     /*
01255     2006-02-08 aradke: rewrote conditional logic in loop body. the previous version
01256             may have yielded incorrect results if one of the two strings that are compared
01257             in the loop is the leading substring of the other, e.g. "spam" and "spamalot".
01258     */
01259     
01260     int res, top, mid, bot, entrysize, elen, c;
01261     unsigned char *entry, *nametable;
01262     
01263     res = pcre_fullinfo (getpatternref (hcp), nil, PCRE_INFO_NAMECOUNT,  (void *) &top);    /* number of entries in table */
01264 
01265     if (res == 0)
01266         res = pcre_fullinfo (getpatternref (hcp), nil, PCRE_INFO_NAMEENTRYSIZE, (void *) &entrysize);   /* length of longest entry in table */
01267 
01268     if (res == 0)
01269         res = pcre_fullinfo (getpatternref (hcp), nil, PCRE_INFO_NAMETABLE, (void *) &nametable);   
01270     
01271     if (res != 0) {
01272         return (res);
01273         }
01274         
01275     bot = 0;
01276     
01277     while (top > bot) {
01278     
01279         mid = (top + bot) / 2;
01280       
01281         entry = nametable + entrysize * mid;
01282 
01283         elen = strlen ((char *)(entry + 2));
01284 
01285         c = memcmp (cptr, (char *)(entry + 2), min(len, elen));
01286 
01287         if (c < 0)  /* 2006-02-08 aradke */
01288             top = mid;
01289         else if (c > 0)
01290             bot = mid + 1;
01291         else if (elen > len)    /* implicitely c == 0 from here on... */
01292             top = mid;  /* shorter entries are lower in table */
01293         else if (elen < len)
01294             bot = mid + 1;  /* longer entries are higher in table */
01295         else
01296             return (entry[0] << 8) + entry[1];  /* implicitly c == 0 and elen == len, i.e. named pattern found */
01297       }
01298 
01299     return (PCRE_ERROR_NOSUBSTRING);
01300     } /*regexpstringnumberfrompattern*/
01301 
01302 
01303 static boolean replscancompileliteral (int ix, int len, bigstring bserror, void *refcon) {
01304     
01305     tyreplscancompileinfo *info = (tyreplscancompileinfo *) refcon;
01306     
01307     return (writehandlestreamreplpart (&(info->s), REGEXP_LITERAL, ix, len, 0));
01308     } /*replscancompileliteral*/
01309 
01310 
01311 static boolean replscancompilenumbered (int ix, int len, int ref, bigstring bserror, void *refcon) {
01312     
01313     tyreplscancompileinfo *info = (tyreplscancompileinfo *) refcon;
01314                         
01315     if (ref > getcapturecount (info->hcp)) {
01316         regexpverberrorwithnumber (regexpnonexistantgroupnumbererror, ix+1, bserror);
01317         return (false);
01318         }
01319     
01320     return (writehandlestreamreplpart (&(info->s), REGEXP_NUMBERED, ix, len, ref));
01321     } /*replscancompilenumbered*/
01322 
01323 
01324 static boolean replscancompilenamed (int ix, int len, const char *cptr, int clen, bigstring bserror, void *refcon) {
01325     
01326     /*
01327     2006-02-18 aradke: actually write the group index ref instead of just nil.
01328         fixes bug in re.replace verb where any matched named groups would consistently
01329         be replaced with the complete match string instead of just the matched group.
01330     */
01331     
01332     tyreplscancompileinfo *info = (tyreplscancompileinfo *) refcon;
01333     int ref;
01334     
01335     ref = regexpstringnumberfrompattern (cptr, clen, info->hcp);
01336                         
01337     if (ref <= 0) {
01338         if (ref == PCRE_ERROR_NOSUBSTRING)
01339             regexpverberrorwithnumber (regexpnonexistantgroupnameerror, ix+1, bserror);
01340         else
01341             regexpverberrorwithnumber (regexpinternalerror, ref, bserror);
01342         return (false);
01343         }
01344     
01345     return (writehandlestreamreplpart (&(info->s), REGEXP_NAMED, ix, len, ref));
01346     } /*replscancompilenamed*/
01347 
01348     
01349 static boolean replscanwriteliteral (int ix, int len, bigstring bserror, void *refcon) {
01350 #pragma unused (bserror)
01351 
01352     tyreplscanexpandinfo *info = (tyreplscanexpandinfo *) refcon;
01353     
01354     return (writehandlestreamhandlepart (&(info->s), info->hsubject, ix, len));
01355     } /*replscanwriteliteral*/
01356 
01357 
01358 static boolean replscanwritenumbered (int ix, int len, int ref, bigstring bserror, void *refcon) {
01359 #pragma unused (len)
01360 
01361     tyreplscanexpandinfo *info = (tyreplscanexpandinfo *) refcon;
01362     tyvaluerecord val;
01363     
01364     if (ref > opcountlistitems (info->hgroups)) {
01365         regexpverberrorwithnumber (regexpnonexistantgroupnumbererror, ix+1, bserror);
01366         return (false);
01367         }
01368 
01369     if (!getnthlistval (info->hgroups, ref, nil, &val))
01370         return (false);
01371     
01372     if (!coercetostring (&val))
01373         return (false);
01374     
01375     if (!writehandlestreamhandle (&(info->s), val.data.stringvalue))
01376         return (false);
01377     
01378     releaseheaptmp (val.data.stringvalue);
01379     
01380     return (true);
01381     } /*replscanwritenumbered*/
01382 
01383 
01384 static boolean replscanwritenamed (int ix, int len, const char *cptr, int clen, bigstring bserror, void *refcon) {
01385 #pragma unused (ix, len, bserror)
01386 
01387     tyreplscanexpandinfo *info = (tyreplscanexpandinfo *) refcon;
01388     hdlhashnode hnode;
01389     hdlhashtable ht;
01390     tyvaluerecord vname, vstr;
01391     bigstring bsname;
01392     boolean fl;
01393     
01394     moveleft ((void *) cptr, stringbaseaddress (bsname), clen);
01395     setstringlength (bsname, clen);
01396 
01397     if (!langhashtablelookup (info->htnames, bsname, &vname, &hnode))
01398         return (false); 
01399 
01400     if (!langexternalvaltotable (vname, &ht, hnode)) {
01401         return (false);
01402         }
01403 
01404     if (!langhashtablelookup (ht, STR_matchString, &vstr, &hnode))
01405         return (false);
01406         
01407     if (vstr.valuetype != stringvaluetype)
01408         if (!copyvaluerecord (vstr, &vstr) || !coercetostring (&vstr))
01409             return (false);
01410 
01411     fl = writehandlestreamhandle (&(info->s), vstr.data.stringvalue);
01412 
01413     if (vstr.fltmpstack)
01414         releaseheaptmp (vstr.data.stringvalue);
01415     
01416     return (fl);
01417     } /*replscanwritenamed*/
01418 
01419 
01420 static boolean regexpcheckliteral (int ix, int len, bigstring bserror, void *refcon) {
01421 #pragma unused (ix, len, bserror, refcon)
01422 
01423     /*nothing to check*/
01424     
01425     return (true);
01426     } /*regexpcheckliteral*/
01427 
01428 
01429 static boolean regexpchecknumbered (int ix, int len, int ref, bigstring bserror, void *refcon) {
01430 #pragma unused (len)
01431 
01432     Handle hcp = (Handle) refcon;
01433                         
01434     if (ref > getcapturecount (hcp)) {
01435         regexperrorwithnumber (frnonexistantgroupnumbererror, ix+1, bserror);
01436         return (false);
01437         }
01438     
01439     return (true);
01440     } /*regexpchecknumbered*/
01441 
01442 
01443 static boolean regexpchecknamed (int ix, int len, const char *cptr, int clen, bigstring bserror, void *refcon) {
01444 #pragma unused (len)
01445 
01446     Handle hcp = (Handle) refcon;
01447     int ref;
01448     
01449     ref = regexpstringnumberfrompattern (cptr, clen, hcp);
01450                         
01451     if (ref <= 0) {
01452         if (ref == PCRE_ERROR_NOSUBSTRING)
01453             regexperrorwithnumber (frnonexistantgroupnameerror, ix+1, bserror);
01454         else
01455             regexperrorwithnumber (frinternalerror, ref, bserror);
01456         return (false);
01457         }
01458     
01459     return (true);
01460     } /*regexpchecknamed*/
01461     
01462 
01463 static boolean regexptextsearchwriteliteral (int ix, int len, bigstring bserror, void *refcon) {
01464 #pragma unused (bserror)
01465 
01466     tyregexpsearchinfo *info = (tyregexpsearchinfo *) refcon;
01467     
01468     return (writehandlestream (&(info->s), stringbaseaddress (searchparams.bsreplace) + ix, len));
01469     } /*regexptextsearchwriteliteral*/
01470 
01471 
01472 static boolean regexptextsearchwritenumbered (int ix, int len, int ref, bigstring bserror, void *refcon) {
01473 #pragma unused (len, ix, bserror)
01474 
01475     tyregexpsearchinfo *info = (tyregexpsearchinfo *) refcon;
01476 
01477     return (writehandlestream (&(info->s), info->p + getgroupoffset (info->hovector, ref), getgrouplength (info->hovector, ref)));
01478     } /*regexptextsearchwritenumbered*/
01479 
01480 
01481 static boolean regexptextsearchwritenamed (int ix, int len, const char *cptr, int clen, bigstring bserror, void *refcon) {
01482 #pragma unused (ix, len, bserror)
01483 
01484     tyregexpsearchinfo *info = (tyregexpsearchinfo *) refcon;
01485     int ref;
01486     
01487     ref = regexpstringnumberfrompattern (cptr, clen, info->hcp);
01488                         
01489     if (ref <= 0) {
01490         return (false);
01491         }
01492 
01493     return (writehandlestream (&(info->s), info->p + getgroupoffset (info->hovector, ref), getgrouplength (info->hovector, ref)));
01494     } /*regexptextsearchwritenamed*/
01495 
01496 
01497 static void replscanerror (int pos, bigstring bsmsg) {
01498     
01499     regexpverberrorwithnumber (regexpbadgroupnameerror, pos, bsmsg);
01500     } /*replscanerror*/
01501 
01502 
01503 static void regexpcheckerror (int pos, bigstring bsmsg) {
01504     
01505     regexperrorwithnumber (frbadgroupnameerror, pos, bsmsg);
01506     } /*regexpcheckerror*/
01507     
01508 
01509 static boolean regexpscanreplacement (const char *pstart, long len, bigstring bserror,
01510                                         tyreplscanliteralcallback literalfunc,
01511                                         tyreplscannumberedcallback numberedfunc,
01512                                         tyreplscannamedcallback namedfunc,
01513                                         tyreplscanbadgroupnamecallback errorfunc,
01514                                         void *refcon) {
01515 
01516     /*
01517     Syntax for references in replacement string:
01518     
01519     \nn where nn is a number, possibly consisting of multiple digits, refers to a numbered substring.
01520     
01521     \g<nn> where nn is a number, possibly consisting of multiple digits, refers to a numbered substring.
01522     
01523     \g<nn> where nn is an alpha string, refers to a named substring.
01524     
01525     This follows the syntax used by the sub method in the Python re module.
01526     */
01527     
01528     const char *p = pstart;
01529     const char *plast = p;
01530     const char *pend = p + len;
01531     
01532     while (p < pend - 1) { /*leave trailing backslash alone*/
01533         
01534         if (*p == '\\') {
01535             
01536             const char *p1 = p + 1; /*still in handle due to break-off condition*/
01537             
01538             if (*p1 == 'g') {
01539                 
01540                 p1++;
01541                 
01542                 if (p1 < pend && *p1 == '<') {
01543                     
01544                     const char *p2 = ++p1;
01545                     
01546                     if (p1 == pend) {
01547                         errorfunc (p2 - pstart, bserror);
01548                         return (false);
01549                         }
01550                     
01551                     if (isdigit(*p1)) { /*should be a numbered group*/
01552                     
01553                         long ref = 0;
01554                         
01555                         while (p1 < pend && isdigit (*p1)) {
01556                             
01557                             ref = 10 * ref + (*p1 - '0');
01558                             
01559                             p1++;
01560                             }
01561                         
01562                         if (p1 == pend || *p1 != '>') {
01563                             errorfunc (p2 - pstart + 1, bserror);
01564                             return (false);
01565                             }
01566                     
01567                         if (!literalfunc (plast - pstart, p - plast, bserror, refcon))
01568                             return (false);
01569                         
01570                         if (!numberedfunc (p - pstart, p1 - p, ref, bserror, refcon))
01571                             return (false);
01572                         
01573                         }
01574                     else { /*might be a named group*/
01575 
01576                         while (p1 < pend && *p1 != '>')
01577                             p1++;
01578                         
01579                         if (p1 == pend || p1 == p2) { /*unterminated or empty group name*/
01580                             errorfunc (p2 - pstart + 1, bserror);
01581                             return (false);             
01582                             }
01583                         
01584                         if (!literalfunc (plast - pstart, p - plast, bserror, refcon))
01585                             return (false);
01586                         
01587                         if (!namedfunc (p - pstart, p1 - p, p2, p1 - p2, bserror, refcon))
01588                             return (false);
01589                         }
01590                     
01591                     p = plast = ++p1; /*point past closing angle bracket*/
01592                     
01593                     continue; /*avoid incrementation of x*/
01594                     }
01595                 else {
01596                     errorfunc (p1 - pstart, bserror);               
01597                     return (false);             
01598                     }
01599                 }
01600             else if (*p1 == '\\') {
01601                 /* 2004/12/11 smd: double backslashes should be collapsed into singles */
01602                 
01603                 if (!literalfunc (plast - pstart, (p - plast) + 1, bserror, refcon))
01604                     return (false);
01605                 
01606                 p = plast = ++p1;
01607                 
01608                 continue; /*avoid incrementation of p*/             
01609                 }
01610             else if (isdigit (*p1)) {
01611                 
01612                 long ref = 0;
01613                 
01614                 while (p1 < pend && isdigit (*p1)) {
01615                     
01616                     ref = 10 * ref + (*p1 - '0');
01617                     
01618                     p1++;
01619                     }
01620                 
01621                 if (!literalfunc (plast - pstart, p - plast, bserror, refcon))
01622                     return (false);
01623                 
01624                 if (!numberedfunc (p - pstart, p1 - p, ref, bserror, refcon))
01625                     return (false);
01626                 
01627                 p = plast = p1;
01628                 
01629                 continue; /*avoid incrementation of x*/             
01630                 }
01631             else {
01632                 p++; /*step over following char*/
01633                 }
01634             }
01635         
01636         p++;
01637         }/*while*/
01638     
01639     /* push remaining literal if any */
01640     
01641     if (plast < pend)
01642         if (!literalfunc (plast - pstart, pend - plast, bserror, refcon))
01643             return (false);
01644     
01645     return (true);
01646     } /*regexpscanreplacement*/
01647 
01648 
01649 static boolean regexpcompilereplacement (Handle hcp, char *p, long len, bigstring bserror, hdlreplacepart *hreplaceparts) {
01650 
01651     tyreplscancompileinfo info;
01652     
01653     /* scan repl parameter */
01654     
01655     info.hcp = hcp;
01656 
01657     openhandlestream (nil, &info.s);
01658 
01659     if (!regexpscanreplacement (p, len, bserror,
01660                                     &replscancompileliteral, &replscancompilenumbered, &replscancompilenamed,
01661                                     &replscanerror,
01662                                     (void *) &info)) {
01663         disposehandlestream (&info.s);
01664         return (false);
01665         }
01666 
01667     if (!writehandlestreamreplpart (&info.s, REGEXP_TERMINATOR, 0, 0, 0)) {
01668         disposehandlestream (&info.s);
01669         return (false);
01670         }
01671     
01672     *hreplaceparts = (hdlreplacepart) closehandlestream (&info.s);
01673 
01674     return (*hreplaceparts != nil);
01675     } /*regexpcompilereplacement*/
01676 
01677 
01678 static boolean writereplacementhandlestream (handlestream *s, Handle hsubject, Handle hreplace, hdlreplacepart hreplaceparts, Handle hovec) {
01679 
01680     long k = 0;
01681     
01682     assert (hreplaceparts != nil);
01683     
01684     while (true) {
01685     
01686         switch ((*hreplaceparts)[k].type) {
01687         
01688             case REGEXP_LITERAL:
01689             {
01690                 if (!writehandlestreamhandlepart (s, hreplace, (*hreplaceparts)[k].ix, (*hreplaceparts)[k].len))
01691                     return (false);
01692 
01693                 break;
01694             }
01695             
01696             case REGEXP_NUMBERED:
01697             case REGEXP_NAMED:
01698             {
01699                 int ref = (*hreplaceparts)[k].ref;
01700                 
01701                 if (!writehandlestreamhandlepart (s, hsubject, getgroupoffset (hovec, ref), getgrouplength (hovec, ref)))
01702                     return (false);
01703 
01704                 break;
01705             }
01706                 
01707             default:
01708                 return (true);
01709         
01710             } /*switch*/
01711     
01712         k++;
01713         
01714         } /*while*/
01715 
01716     return (true);
01717     } /*writereplacementhandlestream*/
01718 
01719 
01720 static boolean getcodetreefromscriptaddress (hdlhashtable htable, bigstring bsverb, hdltreenode *hcode) {
01721     
01722     /*
01723     Given the address of an object, assume it's a script or code object and get its code tree
01724     
01725     Code cribbed from langrunscript in lang.c
01726     */
01727     
01728     tyvaluerecord vhandler;
01729     hdlhashnode handlernode;
01730     
01731     if (!hashtablelookupnode (htable, bsverb, &handlernode)) {
01732         
01733         langparamerror (unknownfunctionerror, bsverb);
01734         
01735         return (false);
01736         }
01737     
01738     vhandler = (**handlernode).val;
01739     
01740     /*build a code tree and call the handler, with our error hook in place*/
01741     
01742     *hcode = nil;
01743     
01744     if (vhandler.valuetype == codevaluetype) {
01745 
01746         *hcode = vhandler.data.codevalue;
01747     }
01748     else if ((**htable).valueroutine == nil) { /*not a kernel table*/
01749         
01750         if (!langexternalvaltocode (vhandler, hcode)) {
01751 
01752             langparamerror (notfunctionerror, bsverb);
01753 
01754             return (false);
01755             }
01756         
01757         if (*hcode == nil) { /*needs compilation*/
01758             
01759             if (!langcompilescript (handlernode, hcode))
01760                 return (false);
01761             }
01762         }
01763     
01764     return (true);
01765     } /*getcodetreefromscriptaddress*/
01766 
01767 
01768 static boolean regexprunreplacecallback (tyaddress *adrcallback, tyvaluerecord *vrepl, tyvaluerecord *vmatchinfo, Handle *hrepl, boolean *flcallbackresult) {
01769     
01770     hdlhashtable htlocals = nil;
01771     hdllistrecord hparams;
01772     tyvaluerecord vparams, vresult, val;
01773     hdltreenode hcode;
01774     hdlhashnode hdummy;
01775     boolean fl;
01776     
01777     *hrepl = nil;
01778     
01779     /* set up table of local variables */
01780     
01781     exemptfromtmpstack (vrepl);
01782     
01783     exemptfromtmpstack (vmatchinfo);
01784     
01785     fl = langpushlocalchain (&htlocals);
01786     
01787     if (fl)
01788         fl = hashtableassign (htlocals, STR_replacementString, *vrepl);
01789     
01790     if (!fl)
01791         disposevaluerecord (*vrepl, false);
01792     
01793     if (fl)
01794         fl = hashtableassign (htlocals, STR_matchInfo, *vmatchinfo);
01795     
01796     if (!fl)
01797         disposevaluerecord (*vmatchinfo, false);
01798     
01799     /* build list of parameters */
01800     
01801     if (fl)
01802         fl = opnewlist (&hparams, false);
01803     
01804     if (fl)
01805         fl = setheapvalue ((Handle) hparams, listvaluetype, &vparams);
01806     
01807     if (fl)
01808         fl = langpushlistaddress (hparams, htlocals, STR_matchInfo);
01809     
01810     if (fl)
01811         fl = langpushlistaddress (hparams, htlocals, STR_replacementString);
01812         
01813     /* obtain code tree for callback script */
01814     
01815     if (fl)
01816         fl = getcodetreefromscriptaddress ((*adrcallback).ht, (*adrcallback).bs, &hcode);
01817     
01818     /* run callback script */
01819     
01820     if (fl)
01821         fl = langrunscriptcode ((*adrcallback).ht, (*adrcallback).bs, hcode, &vparams, nil, &vresult);
01822     
01823     if (fl)
01824         fl = coercetoboolean (&vresult);
01825 
01826     if (fl)
01827         *flcallbackresult = vresult.data.flvalue;
01828     
01829     /* get replacement string from locals table */
01830     
01831     if (fl)
01832         fl = langhashtablelookup (htlocals, STR_replacementString, &val, &hdummy);
01833     
01834     if (fl)
01835         fl = copyvaluerecord (val, &val);
01836     
01837     if (fl)
01838         if (val.valuetype != stringvaluetype)
01839             fl = coercetostring (&val);
01840     
01841     if (fl) {
01842         exemptfromtmpstack (&val);
01843         *hrepl = val.data.stringvalue;
01844         }
01845     
01846     /* dispose table for local variables */
01847     
01848     return (langpoplocalchain (htlocals) && fl);
01849     } /*regexprunreplacecallback*/
01850 
01851 
01852 static boolean regexpreplacematch (handlestream *s, Handle hsubject, Handle hreplace, hdlreplacepart hreplaceparts,
01853                                     Handle hcp, Handle hovec, tyaddress *adrcallback,
01854                                     bigstring bserror, boolean *flresult) {
01855 
01856     hdlhashtable htmatchinfo = nil;
01857     tyvaluerecord vmatchinfo;
01858     handlestream hsrepl;
01859     tyvaluerecord vrepl;
01860     Handle hrepl;
01861     boolean fl;
01862     
01863     *flresult = true; /*default return value*/
01864     
01865     /* if adrcallback was not specified just perform the replacement and return */
01866     
01867     if ((*adrcallback).ht == nil && isemptystring ((*adrcallback).bs))
01868         return (writereplacementhandlestream (s, hsubject, hreplace, hreplaceparts, hovec));
01869     
01870     /* build default replacement string */
01871     
01872     openhandlestream (nil, &hsrepl);
01873     
01874     if (!writereplacementhandlestream (&hsrepl, hsubject, hreplace, hreplaceparts, hovec))
01875         return (false);
01876     
01877     hrepl = closehandlestream (&hsrepl);
01878     
01879     if (hrepl == nil) {
01880         if (!newemptyhandle (&hrepl))
01881             return (false);
01882         }
01883     
01884     if (!setheapvalue (hrepl, stringvaluetype, &vrepl))
01885         return (false);
01886     
01887     /* build matchInfo table */
01888     
01889     if (!tablenewtablevalue (&htmatchinfo, &vmatchinfo))
01890         return (false);
01891     
01892     if (!pushtmpstackvalue (&vmatchinfo))
01893         return (false);
01894     
01895     if (!regexpbuildmatchinfotable (hsubject, hcp, hovec, true, true, bserror, htmatchinfo))
01896         return (false);
01897     
01898     /* run callback script */
01899     
01900     if (!regexprunreplacecallback (adrcallback, &vrepl, &vmatchinfo, &hrepl, flresult)) {
01901         disposehandle (hrepl);
01902         return (false);
01903         }
01904 
01905     if (*flresult)
01906         fl = writehandlestreamhandle (s, hrepl);
01907     else
01908         fl = writehandlestreamhandlepart (s, hsubject, getgroupoffset (hovec, 0), getgrouplength (hovec, 0));
01909 
01910     disposehandle (hrepl);
01911     
01912     return (fl);
01913     } /*regexpreplacematch*/
01914 
01915 
01916 static boolean pushhandlepartonlist (hdllistrecord hlist, Handle hsrc, long offset, long length) {
01917 
01918     Handle h;
01919 
01920     if (!loadfromhandletohandle (hsrc, &offset, length, false, &h))
01921         return (false);
01922 
01923     return (langpushlisttext (hlist, h));
01924     } /*pushhandlepartonlist*/
01925 
01926 
01927 static boolean regexppushonematchonlist (Handle hsubject, Handle hovec, long ixgroup, hdllistrecord hlist) {
01928 
01929     return (pushhandlepartonlist (hlist, hsubject, getgroupoffset (hovec, ixgroup), getgrouplength (hovec, ixgroup)));
01930     } /*regexppushonematchonlist*/
01931 
01932 
01933 static boolean regexpextractmatch (Handle hsubject, Handle hovec, int **hgroups, long ctgroups, hdllistrecord hresult) {
01934 
01935     boolean fl;
01936     
01937     if (ctgroups == 1) {
01938         
01939         /* add string item to result list */
01940         
01941         fl = regexppushonematchonlist (hsubject, hovec, (*hgroups)[0], hresult);
01942         }
01943     else {
01944     
01945         /* add sublist of strings to result list */
01946 
01947         hdllistrecord hsublist = nil;
01948         tyvaluerecord vtemp;
01949         long k;
01950         
01951         if (!opnewlist (&hsublist, false))
01952             return (false);
01953         
01954         for (k = 0; k < ctgroups; k++) {
01955             
01956             if (!regexppushonematchonlist (hsubject, hovec, (*hgroups)[k], hsublist)) {
01957                 opdisposelist (hsublist);
01958                 return (false);
01959                 }
01960             } /*for*/
01961         
01962         initvalue (&vtemp, listvaluetype);
01963         
01964         vtemp.data.binaryvalue = (Handle) hsublist;
01965         
01966         fl = langpushlistval (hresult, nil, &vtemp);
01967         
01968         opdisposelist (hsublist);
01969         }
01970 
01971     return (fl);
01972     } /*regexpextractmatch*/
01973 
01974 
01975 static boolean regexpwritelistitemscallback (tyvaluerecord *v, ptrvoid refcon) {
01976     
01977     tywritelistinfo *info = (tywritelistinfo *) refcon;
01978     
01979     if (!coercetostring (v))
01980         return (false);
01981     
01982     if (!writehandlestreamhandle ((*info).s, (*v).data.stringvalue))
01983         return (false);
01984     
01985     if (!writehandlestreamhandle ((*info).s, (*info).hglue))
01986         return (false);
01987 
01988     return (true);
01989     } /*regexpwritelistitemscallback*/
01990 
01991 
01992 static boolean regexprunvisitcallback (Handle hsubject, Handle hcp, Handle hovec,
01993                                         boolean flmakegroups, boolean flmakenamedgroups, tyaddress *adrcallback,
01994                                         bigstring bserror, tyvaluerecord *vresult) {
01995     
01996     hdlhashtable htlocals = nil;
01997     hdlhashtable htmatchinfo;
01998     hdllistrecord hparams;
01999     tyvaluerecord vparams;
02000     hdltreenode hcode;
02001     boolean fl = true;
02002         
02003     /* obtain code tree for callback script */
02004     
02005     if (!getcodetreefromscriptaddress ((*adrcallback).ht, (*adrcallback).bs, &hcode))
02006         return (false);
02007     
02008     /* set up table for local variables */
02009     
02010     if (!langpushlocalchain (&htlocals))
02011         return (false);
02012         
02013     /* build match info table */
02014     
02015     fl = langassignnewtablevalue (htlocals, STR_matchInfo, &htmatchinfo);
02016     
02017     if (fl)
02018         fl = regexpbuildmatchinfotable (hsubject, hcp, hovec, flmakegroups, flmakenamedgroups, bserror, htmatchinfo);
02019     
02020     /* build list of parameters */
02021     
02022     if (fl)
02023         fl = opnewlist (&hparams, false);
02024     
02025     if (fl)
02026         fl = setheapvalue ((Handle) hparams, listvaluetype, &vparams);
02027     
02028     if (fl)
02029         fl = langpushlistaddress (hparams, htlocals, STR_matchInfo);
02030     
02031     /* run callback script */
02032     
02033     if (fl)
02034         fl = langrunscriptcode ((*adrcallback).ht, (*adrcallback).bs, hcode, &vparams, nil, vresult);
02035     
02036     /* dispose table for local variables */
02037     
02038     fl = langpoplocalchain (htlocals) && fl;
02039 
02040     return (fl);
02041     } /*regexprunvisitcallback*/
02042 
02043 
02044 static boolean regexpgreplistitemscallback (tyvaluerecord *v, ptrvoid refcon) {
02045     
02046     tygreplistinfo *info = (tygreplistinfo *) refcon;
02047     int res;
02048     
02049     if (!coercetostring (v))
02050         return (false);
02051     
02052     res = regexpexechandle ((*info).hcp, (*v).data.stringvalue, 0, longinfinity, (*info).hovec, (*info).bserrorptr);
02053     
02054     if (res <= 0 && res != PCRE_ERROR_NOMATCH)
02055         return (false);
02056     
02057     if ((*info).flincludematches ? (res > 0) : (res == PCRE_ERROR_NOMATCH))
02058         if (!langpushlistval ((*info).hresult, nil, v))
02059             return (false);
02060     
02061     return (true);
02062     } /*regexpgreplistitemscallback*/
02063 
02064 
02065 #ifdef MACVERSION
02066 #pragma mark === public functions ===
02067 #endif
02068 
02069 
02070 boolean regexpcompile (const char *patternstr, int options, bigstring bserror, Handle *hcp) {
02071     
02072     /*
02073     Compile a null-terminated string as a regular expression
02074     */
02075 
02076     pcre *patternref = nil;
02077     pcre_extra *extraref = nil;
02078     const char *errmsg;
02079     int errpos;
02080     tycompiledpattern rec;
02081     Handle h;
02082     real_pcre *pat;
02083     int capturecount, res;
02084     boolean flsuccess = false;
02085     
02086     /* compile pattern */
02087     
02088     patternref = pcre_compile (patternstr, options & PUBLIC_OPTIONS, &errmsg, &errpos, chartableptr);
02089     
02090     if (patternref == nil) {
02091         regexpcompilationerror (errmsg, errpos, bserror);
02092         goto exit;
02093         }
02094         
02095     /* study compiled pattern */
02096     
02097     extraref = pcre_study (patternref, options & PUBLIC_STUDY_OPTIONS, &errmsg);
02098     
02099     if (errmsg != nil) {
02100         regexpcompilationerror (errmsg, 0, bserror);
02101         goto exit;
02102         }
02103 
02104     /* determine number of captured substrings in pattern */
02105     
02106     res = pcre_fullinfo (patternref, extraref, PCRE_INFO_CAPTURECOUNT, &capturecount);
02107     
02108     if (res != 0) {
02109         regexpverberrorwithnumber (regexpinternalerror, res, bserror);
02110         goto exit;
02111         }
02112     
02113     /* nuke tables ptr in real_pcre struct */
02114     
02115     pat = (real_pcre *) patternref;
02116     
02117     pat->tables = nil;
02118     
02119     /* fill in compiled pattern struct*/
02120     
02121     rec.type = MAGIC_NUMBER;
02122     rec.systemid = SYSTEM_ID;
02123     rec.version = CURRENT_VERSION;
02124     rec.pcreversionmajor = PCRE_MAJOR;
02125     rec.pcreversionminor = PCRE_MINOR;
02126     
02127     memtodisklong (rec.type);
02128     memtodiskshort (rec.systemid);
02129     memtodiskshort (rec.version);           /* don't need to byte-swap anything else because we */
02130     memtodiskshort (rec.pcreversionmajor);  /* reject the pattern if it wasn't compiled on the  */
02131     memtodiskshort (rec.pcreversionminor);  /* same platfrom and with the same PCRE version     */
02132 
02133     rec.options = options;
02134 
02135     rec.capturecount = capturecount;
02136 
02137     rec.ovecsize = max (30, 3 * (capturecount + 1));
02138 
02139     rec.pattern_size    = pat->size;
02140     rec.pattern_offset  = sizeof (tycompiledpattern);
02141 
02142     if (extraref != nil) {
02143         if ((extraref->flags & PCRE_EXTRA_STUDY_DATA) != 0 && extraref->study_data != nil) {
02144             rec.study_size = ((pcre_study_data *) (extraref->study_data)) -> size;
02145             rec.study_offset = rec.pattern_offset + rec.pattern_size;
02146             }
02147         }
02148     else {
02149         rec.study_size = 0;
02150         rec.study_offset = 0;
02151         }
02152 
02153     /* allocate handle */
02154     
02155     if (!newclearhandle (sizeof (tycompiledpattern) + rec.pattern_size + rec.study_size, &h))
02156         goto exit;
02157     
02158     /* move data into handle */
02159     
02160     moveleft (&rec, *h, sizeof (rec));
02161     
02162     if (rec.pattern_size > 0)
02163         moveleft (patternref, *h + rec.pattern_offset, rec.pattern_size);
02164     
02165     if (rec.study_size > 0)
02166         moveleft (extraref->study_data, *h + rec.study_offset, rec.study_size);
02167 
02168     *hcp = h;
02169     
02170     flsuccess = true;
02171     
02172 exit:
02173     
02174     if (patternref != nil)
02175         pcre_free (patternref);
02176     
02177     if (extraref != nil)
02178         pcre_free (extraref);
02179         
02180     return (flsuccess);
02181     } /*regexpcompile*/
02182 
02183 
02184 boolean regexpcheckreplacement (Handle hcp, const char *p, int len) {
02185 
02186     /*
02187     Just check whether the replacement string contains a syntax error,
02188     but don't actually build any in-memory structures.
02189     */
02190     
02191     bigstring bsmsg;
02192     boolean fl;
02193     
02194     assert (hcp != nil && *hcp != nil);
02195     
02196     assert (conditionallongswap ((**((tycompiledpattern**)hcp)).type) == MAGIC_NUMBER);
02197     
02198     setemptystring (bsmsg);
02199     
02200     fl = regexpscanreplacement (p, len,
02201                         bsmsg,
02202                         &regexpcheckliteral,
02203                         &regexpchecknumbered,
02204                         &regexpchecknamed,
02205                         &regexpcheckerror,
02206                         (void *) hcp);
02207 
02208     if (!isemptystring (bsmsg))
02209         langerrormessage (bsmsg);
02210     
02211     return (fl);
02212     } /*regexpcheckreplacement*/
02213 
02214 
02215 boolean regexpnewovector (Handle hcp, Handle *hovec) {
02216     return (newclearhandle (sizeof (int) * getovectorsize (hcp), hovec));
02217     } /*regexpnewovector*/
02218 
02219 
02220 boolean regexptextsearch (byte *ptext, long lentext, long *offset, long *lenmatch) {
02221 
02222     /*
02223     Called from textsearch (which it mimicks) in search.c
02224     to support regular expressions in Find & Replace dialogs.
02225     
02226     If we find a match, set up the replacement string right here,
02227     but leave inserting it up to the caller.
02228     
02229     Return value indicates whether we found a match or not.
02230     
02231     Don't throw langerrors.
02232     */
02233 
02234     Handle hcp = searchparams.hcompiledpattern;
02235     Handle hovector = searchparams.hovector;
02236     long ixstart = *offset;
02237     pcre_extra extra;
02238     int res;
02239     bigstring bsdummy;
02240     boolean fl;
02241     
02242     assert (hcp != nil && *hcp != nil);
02243     
02244     assert (conditionallongswap ((**((tycompiledpattern**)hcp)).type) == MAGIC_NUMBER);
02245         
02246     initpcreextra (hcp, &extra);
02247     
02248     while (true) {
02249         
02250         res = pcre_exec (getpatternref (hcp), &extra,
02251                             (const char *)ptext, lentext, ixstart, lentext - ixstart,
02252                             getoptions (hcp) & PUBLIC_EXEC_OPTIONS,
02253                             (int *) *hovector, getovectorsize (hcp), chartableptr); 
02254 
02255         if (res <= 0)
02256             return (false);
02257         
02258         if (!searchparams.flwholewords || isword (ptext, lentext, getgroupoffset (hovector, 0), getgrouplength (hovector, 0))) {
02259         
02260             tyregexpsearchinfo info;
02261             
02262             info.p = (char *)ptext;
02263             info.hcp = hcp;
02264             info.hovector = hovector;
02265             openhandlestream (nil, &info.s);
02266             
02267             disablelangerror ();
02268             
02269             fl = regexpscanreplacement (stringbaseaddress ((char *) searchparams.bsorigreplace),
02270                                 stringlength (searchparams.bsorigreplace),
02271                                 bsdummy,
02272                                 &regexptextsearchwriteliteral,
02273                                 &regexptextsearchwritenumbered,
02274                                 &regexptextsearchwritenamed,
02275                                 &replscanerror,
02276                                 (void *) &info);
02277             
02278             enablelangerror ();
02279             
02280             if (!fl) {
02281                 disposehandlestream (&info.s);
02282                 return (false);
02283                 }
02284             
02285             texthandletostring (closehandlestream (&info.s), searchparams.bsreplace);
02286         
02287             *offset = getgroupoffset (hovector, 0);
02288             *lenmatch = getgrouplength (hovector, 0);
02289 
02290             return (true);
02291             }
02292         
02293         ixstart = getgroupoffset (hcp, 0) + 1; /*continue after beginning of false match*/
02294         }
02295     } /*regexptextsearch*/
02296 
02297 
02298 #ifdef MACVERSION
02299 #pragma mark === kernel verbs ===
02300 #endif
02301 
02302 
02303 static boolean regexpcompileverb (hdltreenode hp1, tyvaluerecord *v, bigstring bserror) {
02304     
02305     /*
02306     compile (pattern, flCaseSensitive = true) //opaque: pattern reference
02307     */
02308     
02309     Handle hpattern;
02310     tyvaluerecord vtemp;
02311     int options = 0;
02312     short ctconsumed = 1;
02313     short ctpositional = 1;
02314     char chterminate = chnul;
02315     Handle hcp;
02316     boolean fl;
02317         
02318     /* get pattern param */
02319     
02320     if (!gettextvalue (hp1, 1, &hpattern))
02321         return (false);
02322 
02323     /* get flCaseSensitive param */
02324     
02325     setbooleanvalue (false, &vtemp); /* defaults to false */
02326 
02327     if (!getoptionalparamvalue (hp1, &ctconsumed, &ctpositional, STR_flCaseSensitive, &vtemp)) 
02328         return (false);
02329     
02330     options |= (vtemp.data.flvalue ? 0L : PCRE_CASELESS);
02331 
02332     /* get flDotMatchesAll param */
02333     
02334     setbooleanvalue (true, &vtemp); /* defaults to true */
02335 
02336     if (!getoptionalparamvalue (hp1, &ctconsumed, &ctpositional, STR_flDotMatchesAll, &vtemp)) 
02337         return (false);
02338     
02339     options |= (vtemp.data.flvalue ? PCRE_DOTALL : 0L);
02340 
02341     /* get flMultiLine param */
02342     
02343     setbooleanvalue (true, &vtemp); /* defaults to true */
02344 
02345     if (!getoptionalparamvalue (hp1, &ctconsumed, &ctpositional, STR_flMultiLine, &vtemp)) 
02346         return (false);
02347     
02348     options |= (vtemp.data.flvalue ? PCRE_MULTILINE : 0L);
02349 
02350     /* get flAutoCapture param */
02351     
02352     setbooleanvalue (true, &vtemp); /* defaults to true */
02353 
02354     if (!getoptionalparamvalue (hp1, &ctconsumed, &ctpositional, STR_flAutoCapture, &vtemp)) 
02355         return (false);
02356     
02357     options |= (vtemp.data.flvalue ? 0 : PCRE_NO_AUTO_CAPTURE);
02358 
02359     /* get flGreedyQuantifiers param */
02360     
02361     setbooleanvalue (true, &vtemp); /* defaults to true */
02362 
02363     if (!getoptionalparamvalue (hp1, &ctconsumed, &ctpositional, STR_flGreedyQuantifiers, &vtemp)) 
02364         return (false);
02365     
02366     options |= (vtemp.data.flvalue ? 0 : PCRE_UNGREEDY);
02367 
02368     /* get flMatchEmptyString param */
02369     
02370     setbooleanvalue (true, &vtemp); /* defaults to true */
02371 
02372     if (!getoptionalparamvalue (hp1, &ctconsumed, &ctpositional, STR_flMatchEmptyString, &vtemp)) 
02373         return (false);
02374     
02375     options |= (vtemp.data.flvalue ? 0 : PCRE_NOTEMPTY);
02376 
02377     /* get flExtendedMode param */
02378     
02379     flnextparamislast = true;
02380     
02381     setbooleanvalue (false, &vtemp); /* defaults to false */
02382 
02383     if (!getoptionalparamvalue (hp1, &ctconsumed, &ctpositional, STR_flExtendedMode, &vtemp)) 
02384         return (false);
02385     
02386     options |= (vtemp.data.flvalue ? PCRE_EXTENDED : 0);
02387     
02388     /* prepare params */
02389     
02390     if (!enlargehandle (hpattern, 1, &chterminate)) /* null-terminate pattern */
02391         return (false);
02392     
02393     /* compile pattern */
02394     
02395     lockhandle (hpattern);
02396     
02397     fl = regexpcompile (*hpattern, options, bserror, &hcp);
02398     
02399     unlockhandle (hpattern);
02400     
02401     return (fl && setheapvalue (hcp, binaryvaluetype, v));
02402     } /*regexpcompileverb*/
02403 
02404 
02405 static boolean regexpmatchverb (hdltreenode hp1, tyvaluerecord *v, bigstring bserror) {
02406 
02407     /*
02408     match (patternRef, s, adrMatchInfoTable=nil, ix = 1, ct = infinity, flMakeGroups=false)
02409          //number: position of match or 0
02410     */
02411 
02412     Handle hcp;
02413     Handle hovec;
02414     Handle hsubject;
02415     long ixstart;
02416     unsigned long matchlength;
02417     hdlhashtable hmatchinfotable;
02418     boolean flmakegroups;
02419     boolean flmakenamedgroups;
02420     tyvaluerecord vtemp;
02421     short ctconsumed = 2;
02422     short ctpositional = 2;
02423     long res;
02424     
02425     /* get patternRef parameter */
02426     
02427     if (!regexpgetpatternvalue (hp1, 1, true, bserror, &hcp, &hovec))
02428         return (false);
02429         
02430     /* get s parameter */
02431     
02432     if (!getreadonlytextvalue (hp1, 2, &hsubject))
02433         return (false);
02434     
02435     /* get ix parameter */
02436     
02437     setlongvalue (1, &vtemp); /* defaults to 1 */
02438 
02439     if (!getoptionalparamvalue (hp1, &ctconsumed, &ctpositional, STR_ix, &vtemp)) 
02440         return (false);
02441     
02442     ixstart = vtemp.data.longvalue - 1; /* convert from 1-based to 0-based*/
02443     
02444     /* get ct parameter */
02445     
02446     setlongvalue (longinfinity, &vtemp); /* defaults to 0x7fffffff */
02447 
02448     if (!getoptionalparamvalue (hp1, &ctconsumed, &ctpositional, STR_ct, &vtemp)) 
02449         return (false);
02450     
02451     matchlength = (unsigned long) vtemp.data.longvalue;
02452     
02453     /* get adrMatchInfoTable parameter */
02454     
02455     hmatchinfotable = nil;
02456     
02457     if (!getoptionaltableparam (hp1, &ctconsumed, &ctpositional, STR_adrMatchInfoTable, &hmatchinfotable))
02458         return (false);
02459         
02460     /* get flMakeGroups parameter */
02461     
02462     setbooleanvalue (false, &vtemp); /* defaults to false */
02463 
02464     if (!getoptionalparamvalue (hp1, &ctconsumed, &ctpositional, STR_flMakeGroups, &vtemp)) 
02465         return (false);
02466     
02467     flmakegroups = vtemp.data.flvalue;
02468         
02469     /* get flmakenamedgroups parameter */
02470 
02471     flnextparamislast = true;
02472     
02473     setbooleanvalue (false, &vtemp); /* defaults to false */
02474 
02475     if (!getoptionalparamvalue (hp1, &ctconsumed, &ctpositional, STR_flMakeNamedGroups, &vtemp)) 
02476         return (false);
02477     
02478     flmakenamedgroups = vtemp.data.flvalue;
02479 
02480     /* execute compiled pattern */
02481     
02482     res = regexpexechandle (hcp, hsubject, ixstart, matchlength, hovec, bserror);
02483 
02484     /* if it did not match, we are done! */
02485         
02486     if (res == PCRE_ERROR_NOMATCH) {
02487     
02488         releaseheaptmp (hovec); /*no need to accumulate these on the tmp stack*/    
02489 
02490         setlongvalue (0, v);
02491         
02492         return (true);
02493         }
02494 
02495     /* catch execution errors, presumably indicative of a bug in the PCRE library */
02496     
02497     if (res <= 0)
02498         return (false);
02499     
02500     /* build match info table */
02501     
02502     if (hmatchinfotable != nil)
02503         if (!regexpbuildmatchinfotable (hsubject, hcp, hovec, flmakegroups, flmakenamedgroups, bserror, hmatchinfotable))
02504             return (false);
02505     
02506     /* return match position */
02507     
02508     setlongvalue (getgroupoffset (hovec, 0) + 1, v);
02509     
02510     releaseheaptmp (hovec); /*no need to accumulate these on the tmp stack*/    
02511     
02512     return (true);
02513     } /*regexpmatchverb*/
02514 
02515 
02516 static boolean regexpreplaceverb (hdltreenode hp1, tyvaluerecord *v, bigstring bserror) {
02517 
02518     /*
02519     replace (patternRef, repl, s, maxReplacements = infinity)
02520          //string: modified string
02521     */
02522 
02523     Handle hcp;
02524     Handle hovec = nil;
02525     Handle hreplace;
02526     long replacelength;
02527     Handle hsubject;
02528     long subjectlength;
02529     long maxrepl;
02530     tyaddress adrctrepl;
02531     tyaddress adrcallback;
02532     tyvaluerecord vtemp;
02533     short ctconsumed = 3;
02534     short ctpositional = 3;
02535     hdlreplacepart hreplaceparts = nil;
02536     handlestream hs;
02537     Handle hresult;
02538     int res;
02539     int ix, lastix;
02540     int ctrepl = 0;
02541     boolean fl, flresult;
02542     
02543     clearbytes (&hs, sizeof (hs));
02544     
02545     /* get patternRef parameter */
02546     
02547     if (!regexpgetpatternvalue (hp1, 1, false, bserror, &hcp, &hovec))
02548         goto exit;
02549         
02550     /* get repl parameter */
02551     
02552     if (!getreadonlytextvalue (hp1, 2, &hreplace))
02553         goto exit;
02554     
02555     replacelength = gethandlesize (hreplace);
02556         
02557     /* get s parameter */
02558     
02559     if (!getreadonlytextvalue (hp1, 3, &hsubject))
02560         goto exit;
02561     
02562     subjectlength = gethandlesize (hsubject);
02563 
02564     /* get maxReplacements parameter */
02565     
02566     setlongvalue (longinfinity, &vtemp); /* defaults to 0x7fffffff */
02567 
02568     if (!getoptionalparamvalue (hp1, &ctconsumed, &ctpositional, STR_maxReplacements, &vtemp)) 
02569         goto exit;
02570     
02571     maxrepl = vtemp.data.longvalue;
02572 
02573     /* get adrReplacementCount parameter */
02574     
02575     adrctrepl.ht = nil; /*defaults to a nil address*/
02576     setemptystring (adrctrepl.bs);
02577 
02578     if (!getoptionaladdressparam (hp1, &ctconsumed, &ctpositional, STR_adrReplacementCount, &adrctrepl.ht, adrctrepl.bs)) 
02579         goto exit;
02580 
02581     /* get adrCallback parameter */
02582 
02583     flnextparamislast = true;
02584     
02585     adrcallback.ht = nil; /*defaults to a nil address*/
02586     setemptystring (adrcallback.bs);
02587 
02588     if (!getoptionaladdressparam (hp1, &ctconsumed, &ctpositional, STR_adrCallback, &adrcallback.ht, adrcallback.bs)) 
02589         goto exit;
02590 
02591     /* return early if the caller hasn't asked for at least one replacement */
02592     
02593     if (maxrepl <= 0) {
02594         
02595         if (!copyhandle (hsubject, &hresult))
02596             return (false);
02597         
02598         goto done;
02599         }
02600     
02601     /* scan repl parameter */
02602     
02603     lockhandle (hreplace);
02604     
02605     fl = regexpcompilereplacement (hcp, *hreplace, replacelength, bserror, &hreplaceparts);
02606     
02607     unlockhandle (hreplace);
02608     
02609     if (!fl)
02610         goto exit;
02611     
02612     pushtmpstack ((Handle) hreplaceparts);
02613 
02614     /* run engine */
02615         
02616     openhandlestream (nil, &hs);
02617     
02618     lastix = ix = 0;
02619     
02620     while (true) { /*execute loop at least once*/   
02621         
02622         res = regexpexechandle (hcp, hsubject, ix, longinfinity, hovec, bserror);
02623             
02624         /* catch internal errors */
02625         
02626         if (res <= 0 && res != PCRE_ERROR_NOMATCH) 
02627             goto exit;
02628         
02629         /* if it did not match, copy remainder and we are done */
02630         
02631         if (res == PCRE_ERROR_NOMATCH) {
02632             
02633             if (!writehandlestreamhandlepart (&hs, hsubject, lastix, subjectlength - lastix))
02634                 goto exit;
02635             
02636             break;
02637             }
02638         
02639         /* write fragment between last match and current match */
02640         
02641         if (!writehandlestreamhandlepart (&hs, hsubject, lastix, getgroupoffset (hovec, 0) - lastix))
02642             goto exit;
02643 
02644         /* process repl string, including captured substrings */
02645         
02646         if (!regexpreplacematch (&hs, hsubject, hreplace, hreplaceparts, hcp, hovec, &adrcallback, bserror, &flresult))
02647             goto exit;
02648         
02649         /* if the callback returned false or we reached max replacements, copy remainder and we are done */
02650         
02651         if (!flresult || ++ctrepl >= maxrepl) {
02652             
02653             int groupend = getgroupend (hovec, 0);
02654             
02655             if (!writehandlestreamhandlepart (&hs, hsubject, groupend, subjectlength - groupend))
02656                 goto exit;
02657         
02658             break;
02659             }
02660         
02661         /* determine position of next match attempt */
02662         
02663         lastix = ix = getgroupend (hovec, 0);
02664         
02665         if (getgrouplength (hovec, 0) == 0)
02666             ix++; /* prevent infinite loop if we matched the empty string */
02667         } /*while*/ 
02668 
02669     hresult = closehandlestream (&hs);
02670     
02671     if (hresult == nil) {
02672         if (!newemptyhandle (&hresult))
02673             goto exit;
02674         }
02675 
02676 done:
02677     
02678     releaseheaptmp (hovec); /*no need to accumulate these on the tmp stack*/    
02679     
02680     releaseheaptmp ((Handle) hreplaceparts);
02681     
02682     /* set result */
02683 
02684     if (!isemptystring (adrctrepl.bs)) {
02685         
02686         setlongvalue (ctrepl, &vtemp);
02687         
02688         if (!langsetsymboltableval (adrctrepl.ht, adrctrepl.bs, vtemp)) {
02689             goto exit;
02690             }
02691         }
02692     
02693     return (setheapvalue (hresult, stringvaluetype, v));
02694     
02695 exit:
02696 
02697     disposehandlestream (&hs);
02698     
02699     return (false);
02700     } /*regexpreplaceverb*/
02701 
02702 
02703 static boolean regexpextractverb (hdltreenode hp1, tyvaluerecord *v, bigstring bserror) {
02704 
02705     /*
02706     extract (patternRef, s, groups = {})
02707          //list: matched strings or group strings
02708     */
02709     Handle hcp;
02710     Handle hovec;
02711     Handle hsubject;
02712     hdllistrecord hgroups;
02713     short ctconsumed = 2;
02714     short ctpositional = 2;
02715     long ctgroups;
02716     int **hgroupsarray;
02717     long ref;
02718     long k;
02719     tyvaluerecord vtemp;
02720     boolean fl;
02721     hdllistrecord hresult;
02722     long ix, lastix, res;
02723     
02724     /* get patternRef parameter */
02725     
02726     if (!regexpgetpatternvalue (hp1, 1, true, bserror, &hcp, &hovec))
02727         return (false);
02728         
02729     /* get s parameter */
02730     
02731     if (!getreadonlytextvalue (hp1, 2, &hsubject))
02732         return (false);
02733     
02734     /* get groups parameter */
02735     
02736     if (!getoptionallistparam (hp1, &ctconsumed, &ctpositional, STR_groups, &hgroups))
02737         return (false);
02738     
02739     /* convert groups list to internal format */
02740     
02741     ctgroups = opcountlistitems (hgroups);
02742     
02743     if (ctgroups == 0) {
02744     
02745         if (!newhandle (sizeof (int), (Handle *) &hgroupsarray))
02746             return (false);
02747         
02748         if (!pushtmpstack ((Handle) hgroupsarray))
02749             return (false);
02750         
02751         (*hgroupsarray)[0] = 0;
02752         
02753         ctgroups = 1;
02754         }
02755     else {
02756     
02757         if (!newclearhandle (ctgroups * sizeof (int), (Handle *) &hgroupsarray))
02758             return (false);
02759         
02760         if (!pushtmpstack ((Handle) hgroupsarray))
02761             return (false);
02762         
02763         for (k = 0; k < ctgroups; k++) {
02764         
02765             if (!getnthlistval (hgroups, k+1, nil, &vtemp))
02766                 return (false);
02767             
02768             disablelangerror ();
02769             
02770             fl = coercetolong (&vtemp);
02771             
02772             enablelangerror ();
02773             
02774             if (fl) {
02775                 ref = vtemp.data.longvalue;
02776                 }
02777             else {
02778                 
02779                 if (!coercetostring (&vtemp))
02780                     return (false);
02781                 
02782                 if (!enlargehandle (vtemp.data.binaryvalue, 1, nil)) /*terminate string with nil char*/
02783                     return (false);
02784                 
02785                 ref = pcre_get_stringnumber(getpatternref (hcp), *vtemp.data.binaryvalue);
02786                 }
02787 
02788             if (ref < 0 || ref > getcapturecount (hcp)) {
02789                 regexpverberrorwithnumber (regexpnonexistantgrouperror, k+1, bserror);
02790                 return (false);
02791                 }
02792             
02793             (*hgroupsarray)[k] = ref;
02794             } /*for*/   
02795         }
02796     
02797     /* prepare result list */
02798     
02799     if (!opnewlist (&hresult, false))
02800         return (false);
02801     
02802     if (!setheapvalue ((Handle) hresult, listvaluetype, v))
02803         return (false);
02804     
02805     /* run engine */
02806     
02807     lastix = ix = 0;
02808     
02809     while (true) { /*execute loop at least once*/   
02810         
02811         res = regexpexechandle (hcp, hsubject, ix, longinfinity, hovec, bserror);
02812             
02813         /* catch internal errors */
02814         
02815         if (res <= 0 && res != PCRE_ERROR_NOMATCH) 
02816             return (false);
02817         
02818         /* if it did not match, we are done */
02819         
02820         if (res == PCRE_ERROR_NOMATCH)
02821             break;
02822 
02823         /* extract requested substrings */
02824         
02825         if (!regexpextractmatch (hsubject, hovec, hgroupsarray, ctgroups, hresult))
02826             return (false);
02827         
02828         /* determine position of next match attempt */
02829         
02830         lastix = ix = getgroupend (hovec, 0);
02831         
02832         if (getgrouplength (hovec, 0) == 0)
02833             ix++; /* prevent infinite loop if we matched the empty string */
02834         } /*while*/ 
02835     
02836     releaseheaptmp (hovec); /*no need to accumulate these on the tmp stack*/    
02837     
02838     releaseheaptmp ((Handle) hgroupsarray); /*no need to accumulate these on the tmp stack*/    
02839     
02840     return (true);
02841     } /*regexpextractverb*/
02842 
02843 
02844 static boolean regexpsplitverb (hdltreenode hp1, tyvaluerecord *v, bigstring bserror) {
02845 
02846     /*
02847     split (patternRef, s, maxChunks = infinity)
02848         //list: unmatched fragments
02849     */
02850     Handle hcp;
02851     Handle hovec;
02852     Handle hsubject;
02853     long subjectlength;
02854     long maxchunks, ctchunks;
02855     tyvaluerecord vtemp;
02856     short ctconsumed = 2;
02857     short ctpositional = 2;
02858     hdllistrecord hresult;
02859     int res;
02860     int ix, lastix, k;
02861     
02862     /* get patternRef parameter */
02863     
02864     if (!regexpgetpatternvalue (hp1, 1, true, bserror, &hcp, &hovec))
02865         return (false);
02866         
02867     /* get s parameter */
02868     
02869     if (!getreadonlytextvalue (hp1, 2, &hsubject))
02870         return (false);
02871     
02872     subjectlength = gethandlesize (hsubject);
02873 
02874     /* get STR_maxSplits parameter */
02875 
02876     flnextparamislast = true;
02877     
02878     setlongvalue (longinfinity, &vtemp); /* defaults to 0x7fffffff */
02879 
02880     if (!getoptionalparamvalue (hp1, &ctconsumed, &ctpositional, STR_maxSplits, &vtemp)) 
02881         return (false);
02882     
02883     maxchunks = vtemp.data.longvalue;
02884     
02885     /* prepare result list */
02886     
02887     if (!opnewlist (&hresult, false))
02888         return (false);
02889     
02890     if (!setheapvalue ((Handle) hresult, listvaluetype, v))
02891         return (false);
02892 
02893     /* return early if the caller hasn't asked for at least one replacement */
02894     
02895     if (maxchunks <= 1) {
02896         
02897         initvalue (&vtemp, stringvaluetype);
02898         
02899         vtemp.data.stringvalue = hsubject;
02900         
02901         return (langpushlistval (hresult, nil, &vtemp));
02902         }
02903     
02904     /* run engine */
02905     
02906     lastix = ix = 0;
02907     
02908     ctchunks = 0;
02909     
02910     while (true) { /*execute loop at least once*/
02911         
02912         /* if we are about to reach maxchunks, add remainder and return */
02913         
02914         if (++ctchunks == maxchunks) {
02915             if (!pushhandlepartonlist (hresult, hsubject, lastix, subjectlength - lastix))
02916                 return (false);
02917             break;
02918             }
02919         
02920         /* attempt match */
02921         
02922         res = regexpexechandle (hcp, hsubject, ix, longinfinity, hovec, bserror);
02923             
02924         /* catch internal errors */
02925         
02926         if (res <= 0 && res != PCRE_ERROR_NOMATCH) 
02927             return (false);
02928         
02929         /* if it did not match, add remainder and return */
02930         
02931         if (res == PCRE_ERROR_NOMATCH) {
02932             if (!pushhandlepartonlist (hresult, hsubject, lastix, subjectlength - lastix))
02933                 return (false);
02934             break;
02935             }
02936         
02937         /* add fragment between last match and current match */
02938         
02939         if (!pushhandlepartonlist (hresult, hsubject, lastix, getgroupoffset (hovec, 0) - lastix))
02940             return (false);
02941         
02942         /* add any captured substrings of the delimiter */
02943         
02944         for (k = 1; k <= getcapturecount (hcp); k++) {
02945             
02946             if (!regexppushonematchonlist (hsubject, hovec, k, hresult))
02947                 return (false);
02948         
02949             } /*for*/
02950         
02951         /* determine position of next match attempt */
02952         
02953         lastix = ix = getgroupend (hovec, 0);
02954         
02955         if (getgrouplength (hovec, 0) == 0)
02956             ix++; /* prevent infinite loop if we matched the empty string */
02957         } /*while*/ 
02958     
02959     releaseheaptmp (hovec); /*no need to accumulate these on the tmp stack*/    
02960     
02961     return (true);
02962     } /*regexpsplitverb*/
02963 
02964 
02965 static boolean regexpjoinverb (hdltreenode hp1, tyvaluerecord *v, bigstring bserror) {
02966 #pragma unused (bserror)
02967 
02968     /*
02969     join (s, strList)
02970         //string: elements of strList glued together with s
02971     */
02972 
02973     Handle hglue;
02974     tyvaluerecord vlist;
02975     handlestream hs;
02976     Handle hresult;
02977     tywritelistinfo info;
02978     
02979     clearbytes (&hs, sizeof (hs));
02980         
02981     /* get s parameter */
02982     
02983     if (!getreadonlytextvalue (hp1, 1, &hglue))
02984         return (false);
02985     
02986     /* get strList parameter */
02987     
02988     if (!getreadonlyparamvalue (hp1, 2, &vlist))
02989         return (false);
02990     
02991     if (vlist.valuetype != listvaluetype) {
02992     
02993         if (!vlist.fltmpstack && !copyvaluerecord (vlist, &vlist))
02994             return (false);
02995         
02996         if (!coercetolist (&vlist, listvaluetype))
02997             return (false);
02998         }
02999     
03000     /* prepare result */
03001 
03002     openhandlestream (nil, &hs);
03003     
03004     /* run engine */
03005     
03006     info.s = &hs;
03007     info.hglue = hglue;
03008     
03009     if (!langvisitlistvalues (&vlist, &regexpwritelistitemscallback, (ptrvoid) &info))
03010         goto exit;
03011     
03012     /* set result, shrink handlestream to remove final copy of hglue */
03013     
03014     hresult = closehandlestream (&hs);
03015     
03016     if (hresult != nil) {
03017         if (!sethandlesize (hresult, gethandlesize (hresult) - gethandlesize (hglue)))
03018             goto exit;
03019         }
03020     else {
03021         if (!newemptyhandle (&hresult))
03022             goto exit;
03023         }
03024 
03025     return (setheapvalue (hresult, stringvaluetype, v));
03026 
03027 exit:
03028 
03029     disposehandlestream (&hs);
03030     
03031     return (false);
03032     } /*regexpjoinverb*/
03033 
03034 
03035 static boolean regexpvisitverb (hdltreenode hp1, tyvaluerecord *v, bigstring bserror) {
03036 
03037     /*
03038     visit (patternRef, s, adrCallBack, flMakeGroups = false, maxRuns = infinity)
03039         //boolean: false if callback returned false
03040     */
03041 
03042     Handle hcp;
03043     Handle hovec;
03044     Handle hsubject;
03045     tyaddress adrcallback;
03046     boolean flmakegroups, flmakenamedgroups;
03047     long maxruns;
03048     short ctconsumed = 3;
03049     short ctpositional = 3;
03050     tyvaluerecord vtemp;
03051     int ix, lastix, res;
03052     
03053     /* get patternRef parameter */
03054     
03055     if (!regexpgetpatternvalue (hp1, 1, false, bserror, &hcp, &hovec))
03056         return (false);
03057         
03058     /* get s parameter */
03059     
03060     if (!gettextvalue (hp1, 2, &hsubject)) /*better get a copy, who knows what the callback script might do to the original*/
03061         return (false);
03062 
03063     /* get adrCallback parameter */
03064     
03065     if (!getaddressparam (hp1, 3, &vtemp))
03066         return (false);
03067     
03068     if (!getaddressvalue (vtemp, &adrcallback.ht, adrcallback.bs))
03069         return (false);
03070         
03071     /* get flMakeGroups parameter */
03072     
03073     setbooleanvalue (false, &vtemp); /* defaults to false */
03074 
03075     if (!getoptionalparamvalue (hp1, &ctconsumed, &ctpositional, STR_flMakeGroups, &vtemp)) 
03076         return (false);
03077     
03078     flmakegroups = vtemp.data.flvalue;
03079         
03080     /* get flmakenamedgroups parameter */
03081     
03082     setbooleanvalue (false, &vtemp); /* defaults to false */
03083 
03084     if (!getoptionalparamvalue (hp1, &ctconsumed, &ctpositional, STR_flMakeNamedGroups, &vtemp)) 
03085         return (false);
03086     
03087     flmakenamedgroups = vtemp.data.flvalue;
03088 
03089     /* get maxRuns parameter */
03090 
03091     flnextparamislast = true;
03092     
03093     setlongvalue (longinfinity, &vtemp); /* defaults to 0x7fffffff */
03094 
03095     if (!getoptionalparamvalue (hp1, &ctconsumed, &ctpositional, STR_maxRuns, &vtemp)) 
03096         return (false);
03097     
03098     maxruns = vtemp.data.longvalue;
03099     
03100     /* run engine */
03101     
03102     lastix = ix = 0;
03103     
03104     setbooleanvalue (true, v);
03105     
03106     while (true) { /*execute loop at least once*/   
03107         
03108         res = regexpexechandle (hcp, hsubject, ix, longinfinity, hovec, bserror);
03109             
03110         /* catch internal errors */
03111         
03112         if (res <= 0 && res != PCRE_ERROR_NOMATCH)
03113             return (false);
03114         
03115         /* if it did not match, we are done */
03116         
03117         if (res == PCRE_ERROR_NOMATCH)
03118             break;
03119 
03120         /* run callback script, break if it returns false */
03121 
03122         if (!regexprunvisitcallback (hsubject, hcp, hovec, flmakegroups, flmakenamedgroups, &adrcallback, bserror, v))
03123             return (false);
03124         
03125         if (!coercetoboolean (v))
03126             return (false);
03127         
03128         if (!(*v).data.flvalue)
03129             break;
03130         
03131         /* if we reached max runs, copy remainder and we are done */
03132         
03133         if (--maxruns <= 0)
03134             break;
03135         
03136         /* determine position of next match attempt */
03137         
03138         lastix = ix = getgroupend (hovec, 0);
03139         
03140         if (getgrouplength (hovec, 0) == 0)
03141             ix++; /* prevent infinite loop if we matched the empty string */
03142         } /*while*/ 
03143     
03144     releaseheaptmp (hovec); /*no need to accumulate these on the tmp stack*/    
03145 
03146     return (true);
03147     } /*regexpvisitverb*/
03148 
03149 
03150 static boolean regexpgrepverb (hdltreenode hp1, tyvaluerecord *v, bigstring bserror) {
03151 
03152     /*
03153     grep (patternRef, s, flIncludeMatches = true)
03154         //list: filtered copy of s
03155     */
03156     
03157     Handle hcp;
03158     Handle hovec;
03159     tyvaluerecord vsubject;
03160     Handle hsubject;
03161     long subjectlength;
03162     boolean flincludematches;   
03163     short ctconsumed = 2;
03164     short ctpositional = 2;
03165     tyvaluerecord vtemp;
03166     hdllistrecord hresult;
03167     char *p, *pstart, *pend;
03168     int res, ix, len;
03169     pcre_extra extra;
03170     boolean flsuccess = false;
03171     
03172     /* get patternRef parameter */
03173     
03174     if (!regexpgetpatternvalue (hp1, 1, true, bserror, &hcp, &hovec))
03175         return (false);
03176 
03177     /* get s parameter (list or string) */
03178     
03179     if (!getreadonlyparamvalue (hp1, 2, &vsubject))
03180         return (false);
03181     
03182     if (vsubject.valuetype != listvaluetype) {
03183         if (!coercetostring (&vsubject))
03184             return (false);
03185         }
03186     
03187     /* get flIncludeMatches parameter */
03188 
03189     flnextparamislast = true;
03190     
03191     setbooleanvalue (true, &vtemp); /* defaults to true */
03192 
03193     if (!getoptionalparamvalue (hp1, &ctconsumed, &ctpositional, STR_flIncludeMatches, &vtemp)) 
03194         return (false);
03195     
03196     flincludematches = vtemp.data.flvalue;
03197     
03198     /* prepare result list */
03199     
03200     if (!opnewlist (&hresult, false))
03201         return (false);
03202     
03203     if (!setheapvalue ((Handle) hresult, listvaluetype, v))
03204         return (false);
03205     
03206     /* handle subject as list */
03207     
03208     if (vsubject.valuetype == listvaluetype) {
03209         
03210         tygreplistinfo info;
03211         
03212         info.hcp = hcp;
03213         info.hovec = hovec;
03214         info.hresult = hresult;
03215         info.flincludematches = flincludematches;
03216         info.bserrorptr = (ptrstring) &bserror;
03217         
03218         flsuccess = langvisitlistvalues (&vsubject, &regexpgreplistitemscallback, (ptrvoid) &info);
03219     
03220         releaseheaptmp (hovec); /*no need to accumulate these on the tmp stack*/
03221         
03222         return (flsuccess);
03223         }
03224     
03225     /* handle subject as string */
03226     
03227     hsubject = vsubject.data.stringvalue;
03228     
03229     subjectlength = gethandlesize (hsubject);
03230 
03231     lockhandle (hsubject); /*pointers into hsubject need to survive pushhandlepartonlist call*/
03232     lockhandle (hcp);
03233     lockhandle (hovec);
03234     
03235     initpcreextra (hcp, &extra);
03236     
03237     p = *hsubject;
03238     
03239     pend = *hsubject + subjectlength;
03240 
03241     while (true) {
03242         
03243         /* get next line */
03244         
03245         pstart = p;
03246         
03247         while (p < pend) {
03248             
03249             if (*p == '\r')
03250                 break;
03251             
03252             p++;
03253             } /*while*/
03254         
03255         /* try to match line */
03256         
03257         ix = pstart - *hsubject;
03258         
03259         len = p - pstart;
03260 
03261         res = regexpexec (hcp, pstart, len, 0, len, hovec, bserror);
03262             
03263         /* catch internal errors */
03264         
03265         if (res <= 0 && res != PCRE_ERROR_NOMATCH) {
03266             regexpverberrorwithnumber (regexpinternalerror, res, bserror);
03267             break;
03268             }
03269     
03270         /* possibly add line to result list */
03271 
03272         if (flincludematches ? (res > 0) : (res == PCRE_ERROR_NOMATCH))
03273             if (!pushhandlepartonlist (hresult, hsubject, ix, len)) /* function call might move memory */
03274                 break;
03275         
03276         /* advance and skip newline char */
03277         
03278         p++;
03279         
03280         if (*p == '\n')
03281             p++;
03282         
03283         /* check wether we are done */
03284         
03285         if (p >= pend) {
03286             flsuccess = true;
03287             break;
03288             }
03289         
03290         } /*while*/
03291     
03292     unlockhandle (hovec);
03293     unlockhandle (hcp);
03294     unlockhandle (hsubject);
03295     
03296     releaseheaptmp (hovec); /*no need to accumulate these on the tmp stack*/    
03297     
03298     return (flsuccess);
03299     } /*regexpgrepverb*/
03300 
03301 
03302 static boolean regexpgetpatterninfoverb (hdltreenode hp1, tyvaluerecord *v, bigstring bserror) {
03303 
03304     /*
03305     on getPatternInfo (patternRef, adrInfoTable)
03306         //boolean: true
03307     */
03308     
03309     Handle hcp;
03310     hdlhashtable htable, htinfo, htfirstbyte, htnames, htoptions;
03311     bigstring bsname, bsnum;
03312     unsigned char *ucptr;
03313     char *cptr;
03314     int x, k, ct, sz;
03315     
03316     /* get patternRef parameter */
03317     
03318     if (!regexpgetpatternvalue (hp1, 1, true, bserror, &hcp, nil))
03319         return (false);
03320     
03321     /* get adrInfoTable parameter */
03322 
03323     if (!getvarparam (hp1, 2, &htable, bsname))
03324         return (false);
03325 
03326     if (!langsuretablevalue (htable, bsname, &htinfo))
03327         return (false);
03328 
03329     /* highest back reference */
03330     
03331     if (!regexpgetpatterninfo (hcp, PCRE_INFO_BACKREFMAX, bserror, (void *) &x))
03332         return (false);
03333     
03334     if (!langassignlongvalue (htinfo, STR_backRefMax, x))
03335         return (false);
03336 
03337     /* capture count */
03338     
03339     if (!regexpgetpatterninfo (hcp, PCRE_INFO_CAPTURECOUNT, bserror, (void *) &x))
03340         return (false);
03341     
03342     if (!langassignlongvalue (htinfo, STR_captureCount, x))
03343         return (false);
03344 
03345     /* first byte */
03346     
03347     if (!regexpgetpatterninfo (hcp, PCRE_INFO_FIRSTBYTE, bserror, (void *) &x))
03348         return (false);
03349     
03350     if (x < 0) {
03351         if (!langassignlongvalue (htinfo, STR_firstByte, x))
03352             return (false);
03353         }
03354     else {
03355         if (!langassigncharvalue (htinfo, STR_firstByte, (unsigned char) x))
03356             return (false);
03357         }
03358 
03359     /* first byte table*/
03360     
03361     if (!regexpgetpatterninfo (hcp, PCRE_INFO_FIRSTTABLE, bserror, (void *) &ucptr))
03362         return (false);
03363     
03364     if (!langassignnewtablevalue (htinfo, STR_firstByteTable, &htfirstbyte))
03365         return (false);
03366     
03367     if (ucptr != nil) {
03368     
03369         for (k = 0; k < 256; k++) {
03370         
03371             numbertostring (k, bsnum);
03372             
03373             padwithzeros (bsnum, 3);
03374             
03375             if (!langassignbooleanvalue (htfirstbyte, bsnum, ucptr[k/8] & (1 << (k % 8))))
03376                 return (false);
03377             } /*for*/
03378         }
03379 
03380     /* last literal */
03381     
03382     if (!regexpgetpatterninfo (hcp, PCRE_INFO_LASTLITERAL, bserror, (void *) &x))
03383         return (false);
03384     
03385     if (x < 0) {
03386         if (!langassignlongvalue (htinfo, STR_lastLiteral, x))
03387             return (false);
03388         }
03389     else {
03390         if (!langassigncharvalue (htinfo, STR_lastLiteral, (unsigned char) x))
03391             return (false);
03392         }
03393 
03394     /* name table */
03395     
03396     if (!regexpgetpatterninfo (hcp, PCRE_INFO_NAMECOUNT, bserror, (void *) &ct))
03397         return (false);
03398     
03399     if (!regexpgetpatterninfo (hcp, PCRE_INFO_NAMEENTRYSIZE, bserror, (void *) &sz))
03400         return (false);
03401     
03402     if (!regexpgetpatterninfo (hcp, PCRE_INFO_NAMETABLE, bserror, (void *) &cptr))
03403         return (false);
03404 
03405     if (!langassignnewtablevalue (htinfo, STR_nameTable, &htnames))
03406         return (false);
03407 
03408     if (cptr != nil) {
03409         
03410         bigstring bs;
03411         int ix;
03412         
03413         for (k = 0; k < ct; k++) {
03414             
03415             ix = (cptr[k*sz] << 8) + cptr[k*sz+1];
03416             
03417             copyctopstring (&(cptr[k*sz+2]), bs);
03418 
03419             if (!langassignlongvalue (htnames, bs, ix))
03420                 return (false);
03421         
03422             } /*for*/
03423         }
03424 
03425     /* options */
03426     
03427     if (!regexpgetpatterninfo (hcp, PCRE_INFO_OPTIONS, bserror, (void *) &x))
03428         return (false);
03429     
03430     if (!langassignnewtablevalue (htinfo, STR_options, &htoptions))
03431         return (false);
03432 
03433     if (!langassignbooleanvalue (htoptions, STR_flCaseSensitive, (x & PCRE_CASELESS) == 0))
03434         return (false);
03435 
03436     if (!langassignbooleanvalue (htoptions, STR_flDotMatchesAll, (x & PCRE_DOTALL) != 0))
03437         return (false);
03438 
03439     if (!langassignbooleanvalue (htoptions, STR_flMultiLine, (x & PCRE_MULTILINE) != 0))
03440         return (false);
03441 
03442     if (!langassignbooleanvalue (htoptions, STR_flAutoCapture, (x & PCRE_NO_AUTO_CAPTURE) == 0))
03443         return (false);
03444 
03445     if (!langassignbooleanvalue (htoptions, STR_flGreedyQuantifiers, (x & PCRE_UNGREEDY) == 0))
03446         return (false);
03447 
03448     if (!langassignbooleanvalue (htoptions, STR_flMatchEmptyString, (x & PCRE_NOTEMPTY) == 0))
03449         return (false);
03450 
03451     if (!langassignbooleanvalue (htoptions, STR_flExtendedMode, (x & PCRE_EXTENDED) == 0))
03452         return (false);
03453 
03454     /* size */
03455     
03456     if (!regexpgetpatterninfo (hcp, PCRE_INFO_SIZE, bserror, (void *) &x))
03457         return (false);
03458     
03459     if (!langassignlongvalue (htinfo, STR_size, x))
03460         return (false);
03461 
03462     /* study size */
03463     
03464     if (!regexpgetpatterninfo (hcp, PCRE_INFO_STUDYSIZE, bserror, (void *) &x))
03465         return (false);
03466     
03467     if (!langassignlongvalue (htinfo, STR_studySize, x))
03468         return (false);
03469 
03470     /* clean up and return */
03471     
03472     setbooleanvalue (true, v);
03473             
03474     return (true);
03475     } /*regexpgetpatterninfoverb*/
03476 
03477 
03478 static boolean regexpexpandverb (hdltreenode hp1, tyvaluerecord *v, bigstring bserror) {
03479     
03480     /*
03481     on expand (s, adrMatchInfoTable)
03482         //string: processed copy of s
03483     */
03484 
03485     Handle hsubject;
03486     long subjectlength;
03487     tyvaluerecord vtemp;
03488     hdlhashtable htmatchinfo;
03489     hdlhashnode hnode;
03490     tyreplscanexpandinfo info;
03491     Handle hresult;
03492     boolean fl;
03493         
03494     /* get s parameter */
03495     
03496     if (!getreadonlytextvalue (hp1, 1, &hsubject))
03497         return (false);
03498     
03499     subjectlength = gethandlesize (hsubject);
03500     
03501     /* get adrMatchInfoTable parameter */
03502     
03503     if (!gettablevalue (hp1, 2, &htmatchinfo))
03504         return (false);
03505 
03506     /* get groupStrings list from info table*/
03507         
03508     if (!langhashtablelookup (htmatchinfo, STR_groupStrings, &vtemp, &hnode))
03509         return (false);
03510     
03511     if (vtemp.valuetype != listvaluetype) {
03512     
03513         if (!copyvaluerecord (vtemp, &vtemp) || !coercetolist (&vtemp, listvaluetype)) {
03514             return (false);
03515             }
03516         }
03517             
03518     info.hgroups = (hdllistrecord) vtemp.data.binaryvalue;
03519 
03520     /* get namedGroups sub-table from info table*/
03521 
03522     if (hashtablelookup (htmatchinfo, STR_namedGroups, &vtemp, &hnode)) {
03523         if (!langexternalvaltotable (vtemp, &info.htnames, hnode)) {
03524             return (false);
03525             }
03526         }
03527     
03528     /* perform replacement */
03529     
03530     info.hsubject = hsubject;
03531 
03532     openhandlestream (nil, &info.s);
03533     
03534     lockhandle (hsubject);
03535 
03536     fl = regexpscanreplacement (*hsubject, subjectlength, bserror,
03537                                     &replscanwriteliteral,
03538                                     &replscanwritenumbered,
03539                                     &replscanwritenamed,
03540                                     &replscanerror,
03541                                     (void *) &info);
03542     
03543     unlockhandle (hsubject);
03544     
03545     if (!fl) {
03546         disposehandlestream (&info.s);
03547         return (false);
03548         }
03549         
03550     hresult = closehandlestream (&info.s);
03551     
03552     if (hresult == nil) {
03553         if (!newemptyhandle (&hresult))
03554             return (false);
03555         }
03556         
03557     return (setheapvalue (hresult, stringvaluetype, v));
03558     } /*regexpexpandverb*/
03559 
03560 
03561 static boolean regexpfunctionvalue (short token, hdltreenode hp1, tyvaluerecord *v, bigstring bserror) {
03562 
03563     /*
03564     2003-04-22 AR: kernel verbs for regular expressions
03565     */
03566     
03567     setbooleanvalue (false, v); /*by default, regexp functions return false*/
03568     
03569     switch (token) {
03570 
03571         case compilefunc:
03572             return (regexpcompileverb (hp1, v, bserror));
03573 
03574         case matchfunc:
03575             return (regexpmatchverb (hp1, v, bserror));
03576 
03577         case replacefunc:
03578             return (regexpreplaceverb (hp1, v, bserror));
03579 
03580         case extractfunc:
03581             return (regexpextractverb (hp1, v, bserror));
03582 
03583         case splitfunc:
03584             return (regexpsplitverb (hp1, v, bserror));
03585 
03586         case joinfunc:
03587             return (regexpjoinverb (hp1, v, bserror));
03588 
03589         case visitfunc:
03590             return (regexpvisitverb (hp1, v, bserror));
03591 
03592         case grepfunc:
03593             return (regexpgrepverb (hp1, v, bserror));
03594         
03595         case getpatterninfofunc:
03596             return (regexpgetpatterninfoverb (hp1, v, bserror));
03597         
03598         case expandfunc:
03599             return (regexpexpandverb (hp1, v, bserror));
03600 
03601         default:
03602             getstringlist (langerrorlist, unimplementedverberror, bserror);
03603             return (false);
03604         } /*switch*/
03605     } /*regexpfunctionvalue*/
03606 
03607 
03608 boolean regexpinitverbs (void) {
03609 
03610     /*
03611     2003-04-22 AR: new verbs. Call me from shellinit in shell.c.
03612     */
03613     
03614     assert (sizeof (regexp_default_tables) == tables_length); /*if this fails, something changed in the PCRE library*/
03615     
03616     chartableptr = regexp_default_tables; //was: pcre_maketables (); /*build char tables using current locale*/
03617     
03618     return (loadfunctionprocessor (idregexpverbs, &regexpfunctionvalue));
03619     } /*regexpinitverbs*/
03620 
03621 
03622 #endif /* flregexpverbs */
03623 

Generated on Wed May 31 18:19:52 2006 for frontierkernel 10.1.10a by  doxygen 1.4.6