summaryrefslogtreecommitdiff
path: root/Utilities/PCRE/pcre_subst
diff options
context:
space:
mode:
Diffstat (limited to 'Utilities/PCRE/pcre_subst')
-rw-r--r--Utilities/PCRE/pcre_subst/pcre_subst.373
-rw-r--r--Utilities/PCRE/pcre_subst/pcre_subst.c191
-rw-r--r--Utilities/PCRE/pcre_subst/pcre_subst.h35
3 files changed, 299 insertions, 0 deletions
diff --git a/Utilities/PCRE/pcre_subst/pcre_subst.3 b/Utilities/PCRE/pcre_subst/pcre_subst.3
new file mode 100644
index 0000000..b3c269b
--- /dev/null
+++ b/Utilities/PCRE/pcre_subst/pcre_subst.3
@@ -0,0 +1,73 @@
+.TH PCRE 3
+.SH NAME
+pcre_subst - Perl-compatible regular expression subsitution.
+.SH SYNOPSIS
+.B #include <pcre.h>
+.br
+.B #include <pcre_subst.h>
+.PP
+.SM
+.br
+int pcre_exec(const pcre *\fIcode\fR, const pcre_extra *\fIextra\fR,
+.ti +5n
+const char *\fIsubject\fR, int \fIlength\fR, int \fIstartoffset\fR,
+.ti +5n
+int \fIoptions\fR, char *\fIreplacement\fR);
+
+
+
+.SH DESCRIPTION
+\fBpcre_subst\fR is a convenience routine that calls \fIpcre_exec\fR,
+and returns a freshly allocated string based on the \fIsubject\fR with
+the \fIreplacement\fR action applied. Unlike \fIsubject\fR, whics is
+passed as a byte array with a length, \fIreplacement\fR is expected to
+be a zero terminated string (most users will just pass \fIstrlen(subject)\fR
+as the \fIlength\fR).
+
+.br
+If no match is found, pcre_subst returns NULL. The returned string is zero
+terminated (note that \fIsubject\fR doesn't have to be). For information
+on the \fIcode\fR, \fIextra\fR, \fIsubject\fR, \fIlength\fR,
+\fIstartoffset\fR and \fIoptions\fR parameters, please see \fBpcre(3)\fR.
+
+.SH REPLACEMENT STRING
+The replacement string supports a subset of the PERL replacement string.
+In particular, \\1 style escapes are not supported (actually, only the
+$1 style is handled).
+
+.SH EXAMPLE
+.Bd -literal -compact
+#include <stdio.h>
+#include <pcre.h>
+#include "pcre_subst.h"
+
+int
+main()
+{
+ char *pat = "quick\\\\s(\\\\w+)\\\\s(fox)";
+ char *rep = "$1ish $2";
+ char *str = "The quick brown foxy";
+ char *newstr;
+ const char *err;
+ int erroff;
+ pcre_extra *extra;
+ pcre *p = pcre_compile(pat, 0, &err, &erroff, NULL);
+ if (p == NULL) {
+ fprintf(stderr, "%s at %d\\n", err, erroff);
+ exit(1);
+ }
+ extra = pcre_study(p, 0, &err);
+ if (err != NULL)
+ fprintf(stderr, "Study %s: %s\\n", pat, err);
+ newstr = pcre_subst(ppat, extra, str, strlen(str),
+ 0, 0, rep);
+ if (newstr) {
+ printf("New string: %s\\n", newstr);
+ pcre_free(newstr);
+ };
+ return 0;
+}
+.Ed
+
+.SH SEE ALSO
+pcre(3)
diff --git a/Utilities/PCRE/pcre_subst/pcre_subst.c b/Utilities/PCRE/pcre_subst/pcre_subst.c
new file mode 100644
index 0000000..3f34f97
--- /dev/null
+++ b/Utilities/PCRE/pcre_subst/pcre_subst.c
@@ -0,0 +1,191 @@
+/*************************************************
+* PCRE string replacement *
+*************************************************/
+
+/*
+PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+pcre_subst is a wrapper around pcre_exec designed to make it easier to
+perform PERL style replacements with PCRE.
+
+Written by: Bert Driehuis <driehuis@playbeing.org>
+
+ Copyright (c) 2000 Bert Driehuis
+
+-----------------------------------------------------------------------------
+Permission is granted to anyone to use this software for any purpose on any
+computer system, and to redistribute it freely, subject to the following
+restrictions:
+
+1. This software is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+2. The origin of this software must not be misrepresented, either by
+ explicit claim or by omission.
+
+3. Altered versions must be plainly marked as such, and must not be
+ misrepresented as being the original software.
+
+4. If PCRE is embedded in any software that is released under the GNU
+ General Purpose Licence (GPL), then the terms of that licence shall
+ supersede any condition above with which it is incompatible.
+*/
+
+#include <stdio.h>
+#include <ctype.h>
+#include <string.h>
+#include <pcre.h>
+#include "pcre_subst.h"
+
+#define MAXCAPTURE 50
+
+#ifdef DEBUG_PCRE_SUBST
+static void
+dumpstr(const char *str, int len, int start, int end)
+{
+ int i;
+ for (i = 0; i < strlen(str); i++) {
+ if (i >= start && i < end)
+ putchar(str[i]);
+ else
+ putchar('-');
+ }
+ putchar('\n');
+}
+
+static void
+dumpmatch(const char *str, int len, const char *rep, int nmat, const int *ovec)
+{
+ int i;
+ printf("%s Input\n", str);
+ printf("nmat=%d", nmat);
+ for (i = 0; i < nmat * 2; i++)
+ printf(" %d", ovec[i]);
+ printf("\n");
+ for (i = 0; i < nmat * 2; i += 2)
+ dumpstr(str, len, ovec[i], ovec[i+1]);
+ printf("\n");
+}
+#endif
+
+static int
+findreplen(const char *rep, int nmat, const int *replen)
+{
+ int len = 0;
+ int val;
+ char *cp = (char *)rep;
+ while(*cp) {
+ if (*cp == '$' && isdigit(cp[1])) {
+ val = strtoul(&cp[1], &cp, 10);
+ if (val && val <= nmat + 1)
+ len += replen[val -1];
+ else
+ fprintf(stderr, "repl %d out of range\n", val);
+ } else {
+ cp++;
+ len++;
+ }
+ }
+ return len;
+}
+
+static void
+doreplace(char *out, const char *rep, int nmat, int *replen, const char **repstr)
+{
+ int val;
+ char *cp = (char *)rep;
+ while(*cp) {
+ if (*cp == '$' && isdigit(cp[1])) {
+ val = strtoul(&cp[1], &cp, 10);
+ if (val && val <= nmat + 1) {
+ strncpy(out, repstr[val - 1], replen[val - 1]);
+ out += replen[val -1];
+ }
+ } else {
+ *out++ = *cp++;
+ }
+ }
+}
+
+static char *
+edit(const char *str, int len, const char *rep, int nmat, const int *ovec)
+{
+ int i, slen, rlen;
+ const int *mvec = ovec;
+ char *res, *cp;
+ int replen[MAXCAPTURE];
+ const char *repstr[MAXCAPTURE];
+ nmat--;
+ ovec += 2;
+ for (i = 0; i < nmat; i++) {
+ replen[i] = ovec[i * 2 + 1] - ovec[i * 2];
+ repstr[i] = &str[ovec[i * 2]];
+#ifdef DEBUG_PCRE_SUBST
+ printf(">>>%d %d %.*s\n", i, replen[i], replen[i], repstr[i]);
+#endif
+ }
+ slen = len;
+ len -= mvec[1] - mvec[0];
+ len += rlen = findreplen(rep, nmat, replen);
+#ifdef DEBUG_PCRE_SUBST
+ printf("resulting length %d (srclen=%d)\n", len, slen);
+#endif
+ cp = res = pcre_malloc(len + 1);
+ if (mvec[0] > 0) {
+ strncpy(cp, str, mvec[0]);
+ cp += mvec[0];
+ }
+ doreplace(cp, rep, nmat, replen, repstr);
+ cp += rlen;
+ if (mvec[1] < slen)
+ strcpy(cp, &str[mvec[1]]);
+ res[len] = 0;
+ return res;
+}
+
+char *
+pcre_subst(const pcre *ppat, const pcre_extra *extra, const char *str, int len,
+ int offset, int options, const char *rep)
+{
+ int nmat;
+ int ovec[MAXCAPTURE * 3];
+ nmat = pcre_exec(ppat, extra, str, len, offset, options,
+ ovec, sizeof(ovec));
+#ifdef DEBUG_PCRE_SUBST
+ dumpmatch(str, len, rep, nmat, ovec);
+#endif
+ if (nmat <= 0)
+ return NULL;
+ return(edit(str, len, rep, nmat, ovec));
+}
+
+#ifdef DEBUG_BUILD
+int
+main()
+{
+ char *pat = "quick\\s(\\w+)\\s(fox)";
+ char *rep = "$1ish $2";
+ char *str = "The quick brown foxy";
+ char *newstr;
+ const char *err;
+ int erroffset;
+ pcre_extra *extra;
+ pcre *ppat = pcre_compile(pat, 0, &err, &erroffset, NULL);
+ if (ppat == NULL) {
+ fprintf(stderr, "%s at %d\n", err, erroffset);
+ exit(1);
+ }
+ extra = pcre_study(ppat, 0, &err);
+ if (err != NULL)
+ fprintf(stderr, "Study %s failed: %s\n", pat, err);
+ newstr = pcre_subst(ppat, extra, str, strlen(str), 0, 0, rep);
+ if (newstr) {
+ printf("Newstr\t%s\n", newstr);
+ pcre_free(newstr);
+ } else {
+ printf("No match\n");
+ }
+ return 0;
+}
+#endif
diff --git a/Utilities/PCRE/pcre_subst/pcre_subst.h b/Utilities/PCRE/pcre_subst/pcre_subst.h
new file mode 100644
index 0000000..e4f4c44
--- /dev/null
+++ b/Utilities/PCRE/pcre_subst/pcre_subst.h
@@ -0,0 +1,35 @@
+/*************************************************
+* PCRE string replacement *
+*************************************************/
+
+/*
+PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+pcre_subst is a wrapper around pcre_exec designed to make it easier to
+perform PERL style replacements with PCRE.
+
+Written by: Bert Driehuis <driehuis@playbeing.org>
+
+ Copyright (c) 2000 Bert Driehuis
+
+-----------------------------------------------------------------------------
+Permission is granted to anyone to use this software for any purpose on any
+computer system, and to redistribute it freely, subject to the following
+restrictions:
+
+1. This software is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+2. The origin of this software must not be misrepresented, either by
+ explicit claim or by omission.
+
+3. Altered versions must be plainly marked as such, and must not be
+ misrepresented as being the original software.
+
+4. If PCRE is embedded in any software that is released under the GNU
+ General Purpose Licence (GPL), then the terms of that licence shall
+ supersede any condition above with which it is incompatible.
+*/
+
+char *pcre_subst(const pcre *, const pcre_extra *, const char *, int, int, int, const char *);