mirror of
https://github.com/lsdlsd88/JC4827W543.git
synced 2026-05-23 17:17:43 +01:00
initial commit
This commit is contained in:
@@ -0,0 +1,14 @@
|
||||
Regexp
|
||||
======
|
||||
|
||||
Regular expression parser for microcontrollers based on the Lua one.
|
||||
|
||||
Documentation on interfacing with the library, and other details at:
|
||||
|
||||
http://www.gammon.com.au/forum/?id=11063
|
||||
|
||||
## Documentation on regular expressions (Lua patterns)
|
||||
|
||||
* [Official Lua documentation](http://www.lua.org/manual/5.2/manual.html#6.4.1)
|
||||
|
||||
* [Simplified documentation from MUSHclient help](http://www.gammon.com.au/scripts/doc.php?lua=string.find)
|
||||
@@ -0,0 +1 @@
|
||||
#include "src/Regexp.h"
|
||||
@@ -0,0 +1,52 @@
|
||||
#include <Regexp.h>
|
||||
|
||||
// called for each match
|
||||
void match_callback (const char * match, // matching string (not null-terminated)
|
||||
const unsigned int length, // length of matching string
|
||||
const MatchState & ms) // MatchState in use (to get captures)
|
||||
{
|
||||
char cap [10]; // must be large enough to hold captures
|
||||
|
||||
Serial.print ("Matched: ");
|
||||
Serial.write ((byte *) match, length);
|
||||
Serial.println ();
|
||||
|
||||
for (byte i = 0; i < ms.level; i++)
|
||||
{
|
||||
Serial.print ("Capture ");
|
||||
Serial.print (i, DEC);
|
||||
Serial.print (" = ");
|
||||
ms.GetCapture (cap, i);
|
||||
Serial.println (cap);
|
||||
} // end of for each capture
|
||||
|
||||
} // end of match_callback
|
||||
|
||||
|
||||
void setup ()
|
||||
{
|
||||
Serial.begin (115200);
|
||||
Serial.println ();
|
||||
unsigned long count;
|
||||
|
||||
// what we are searching (the target)
|
||||
char buf [100] = "The quick brown fox jumps over the lazy wolf";
|
||||
|
||||
// match state object
|
||||
MatchState ms (buf);
|
||||
|
||||
// original buffer
|
||||
Serial.println (buf);
|
||||
|
||||
// search for three letters followed by a space (two captures)
|
||||
count = ms.GlobalMatch ("(%a+)( )", match_callback);
|
||||
|
||||
// show results
|
||||
Serial.print ("Found ");
|
||||
Serial.print (count); // 8 in this case
|
||||
Serial.println (" matches.");
|
||||
|
||||
|
||||
} // end of setup
|
||||
|
||||
void loop () {}
|
||||
@@ -0,0 +1,61 @@
|
||||
#include <Regexp.h>
|
||||
|
||||
// called for every match
|
||||
void replace_callback (const char * match, // what we found
|
||||
const unsigned int length, // how long it was
|
||||
const char * & replacement, // put replacement here
|
||||
unsigned int & replacement_length, // put replacement length here
|
||||
const MatchState & ms) // for looking up captures
|
||||
{
|
||||
|
||||
// show matching text
|
||||
Serial.print("Match = ");
|
||||
Serial.write((byte *) match, length);
|
||||
Serial.println ();
|
||||
|
||||
replacement = "Nick";
|
||||
replacement_length = 4;
|
||||
} // end of replace_callback
|
||||
|
||||
void setup ()
|
||||
{
|
||||
Serial.begin (115200);
|
||||
Serial.println ();
|
||||
unsigned long count;
|
||||
|
||||
// what we are searching (the target)
|
||||
char buf [100] = "The quick brown fox jumps over the lazy wolf";
|
||||
|
||||
// match state object
|
||||
MatchState ms (buf);
|
||||
|
||||
// original buffer
|
||||
Serial.println (buf);
|
||||
|
||||
// search for three letters
|
||||
count = ms.GlobalReplace ("%a+", replace_callback);
|
||||
|
||||
// show results
|
||||
Serial.print ("Converted string: ");
|
||||
Serial.println (buf);
|
||||
Serial.print ("Found ");
|
||||
Serial.print (count); // 9 in this case
|
||||
Serial.println (" matches.");
|
||||
|
||||
// copy in new target
|
||||
strcpy (buf, "But does it get goat's blood out?");
|
||||
ms.Target (buf); // recompute length
|
||||
|
||||
// replace vowels with *
|
||||
count = ms.GlobalReplace ("[aeiou]", "*");
|
||||
|
||||
// show results
|
||||
Serial.print ("Converted string: ");
|
||||
Serial.println (buf);
|
||||
Serial.print ("Found ");
|
||||
Serial.print (count); // 13 in this case
|
||||
Serial.println (" matches.");
|
||||
|
||||
} // end of setup
|
||||
|
||||
void loop () {}
|
||||
+45
@@ -0,0 +1,45 @@
|
||||
#include <Regexp.h>
|
||||
|
||||
// called for every match
|
||||
void replace_callback (const char * match, // what we found
|
||||
const unsigned int length, // how long it was
|
||||
const char * & replacement, // put replacement here
|
||||
unsigned int & replacement_length, // put replacement length here
|
||||
const MatchState & ms) // for looking up captures
|
||||
{
|
||||
static byte c; // for holding replacement byte, must be static
|
||||
|
||||
char hexdigits [3]; // to hold hex string
|
||||
|
||||
// get first capture
|
||||
ms.GetCapture (hexdigits, 0);
|
||||
// convert from hex to printable
|
||||
c = strtol (hexdigits, NULL, 16);
|
||||
|
||||
// set as replacement
|
||||
replacement = (char *) &c;
|
||||
replacement_length = 1;
|
||||
} // end of replace_callback
|
||||
|
||||
|
||||
void setup ()
|
||||
{
|
||||
Serial.begin (115200);
|
||||
|
||||
// what we are searching
|
||||
char buf [100] = "%7B%22John+Doe%22%7D";
|
||||
|
||||
// for matching regular expressions
|
||||
MatchState ms (buf);
|
||||
|
||||
// easy part, replace + by space
|
||||
ms.GlobalReplace ("%+", " ");
|
||||
|
||||
// replace %xx (eg. %22) by what the hex code represents
|
||||
ms.GlobalReplace ("%%(%x%x)", replace_callback);
|
||||
|
||||
Serial.println (buf);
|
||||
|
||||
} // end of setup
|
||||
|
||||
void loop () {}
|
||||
@@ -0,0 +1,29 @@
|
||||
#include <Regexp.h>
|
||||
|
||||
void setup ()
|
||||
{
|
||||
Serial.begin (115200);
|
||||
|
||||
// match state object
|
||||
MatchState ms;
|
||||
|
||||
// what we are searching (the target)
|
||||
char buf [100] = "The quick brown fox jumps over the lazy wolf";
|
||||
ms.Target (buf); // set its address
|
||||
Serial.println (buf);
|
||||
|
||||
char result = ms.Match ("f.x");
|
||||
|
||||
if (result > 0)
|
||||
{
|
||||
Serial.print ("Found match at: ");
|
||||
Serial.println (ms.MatchStart); // 16 in this case
|
||||
Serial.print ("Match length: ");
|
||||
Serial.println (ms.MatchLength); // 3 in this case
|
||||
}
|
||||
else
|
||||
Serial.println ("No match.");
|
||||
|
||||
} // end of setup
|
||||
|
||||
void loop () {}
|
||||
@@ -0,0 +1,23 @@
|
||||
#include <Regexp.h>
|
||||
|
||||
void setup ()
|
||||
{
|
||||
Serial.begin (115200);
|
||||
|
||||
// match state object
|
||||
MatchState ms;
|
||||
|
||||
// what we are searching (the target)
|
||||
char buf [100] = "The quick brown fox jumps over the lazy wolf";
|
||||
ms.Target (buf); // set its address
|
||||
|
||||
unsigned int count = ms.MatchCount ("[aeiou]");
|
||||
|
||||
Serial.println (buf);
|
||||
Serial.print ("Found ");
|
||||
Serial.print (count); // 11 in this case
|
||||
Serial.println (" matches.");
|
||||
|
||||
} // end of setup
|
||||
|
||||
void loop () {}
|
||||
@@ -0,0 +1,9 @@
|
||||
MatchState KEYWORD1
|
||||
Match KEYWORD2
|
||||
Target KEYWORD2
|
||||
GetMatch KEYWORD2
|
||||
GetCapture KEYWORD2
|
||||
GetResult KEYWORD2
|
||||
MatchCount KEYWORD2
|
||||
GlobalMatch KEYWORD2
|
||||
GlobalReplace KEYWORD2
|
||||
@@ -0,0 +1,9 @@
|
||||
name=Regexp
|
||||
version=0.1.0
|
||||
author=Nick Gammon
|
||||
maintainer=Nick Gammon
|
||||
sentence=Regular expression parser for microcontrollers
|
||||
paragraph=Based upon Lua implementation
|
||||
category=Uncategorized
|
||||
url=https://github.com/nickgammon/Regexp
|
||||
architectures=*
|
||||
@@ -0,0 +1,732 @@
|
||||
/*
|
||||
|
||||
Regular-expression matching library for Arduino.
|
||||
|
||||
Written by Nick Gammon.
|
||||
Date: 30 April 2011
|
||||
|
||||
Heavily based on the Lua regular expression matching library written by Roberto Ierusalimschy.
|
||||
|
||||
Adapted to run on the Arduino by Nick Gammon.
|
||||
|
||||
VERSION
|
||||
|
||||
Version 1.0 - 30th April 2011 : initial release.
|
||||
Version 1.1 - 1st May 2011 : added some helper functions, made more modular.
|
||||
Version 1.2 - 19th May 2011 : added more helper functions for replacing etc.
|
||||
|
||||
|
||||
LICENSE
|
||||
|
||||
|
||||
Copyright © 1994–2010 Lua.org, PUC-Rio.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"),
|
||||
to deal in the Software without restriction, including without limitation the rights to use,
|
||||
copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
|
||||
and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
|
||||
INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
|
||||
DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
|
||||
OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
|
||||
USAGE
|
||||
|
||||
|
||||
Find the first match of the regular expression "pattern" in the supplied string, starting at position "index".
|
||||
|
||||
If found, returns REGEXP_MATCHED (1).
|
||||
|
||||
Also match_start and match_len in the MatchState structure are set to the start offset and length of the match.
|
||||
|
||||
The capture in the MatchState structure has the locations and lengths of each capture.
|
||||
|
||||
If not found, returns REGEXP_NOMATCH (0).
|
||||
|
||||
On a parsing error (eg. trailing % symbol) returns a negative number.
|
||||
|
||||
|
||||
EXAMPLE OF CALLING ON THE ARDUINO
|
||||
|
||||
// ------------------------------------- //
|
||||
|
||||
#include <Regexp.h>
|
||||
|
||||
void setup ()
|
||||
{
|
||||
|
||||
Serial.begin (115200);
|
||||
Serial.println ();
|
||||
|
||||
MatchState ms;
|
||||
char buf [100]; // large enough to hold expected string, or malloc it
|
||||
|
||||
// string we are searching
|
||||
ms.Target ("Testing: answer=42");
|
||||
|
||||
// search it
|
||||
char result = ms.Match ("(%a+)=(%d+)", 0);
|
||||
|
||||
// check results
|
||||
|
||||
switch (result)
|
||||
{
|
||||
case REGEXP_MATCHED:
|
||||
|
||||
Serial.println ("-----");
|
||||
Serial.print ("Matched on: ");
|
||||
Serial.println (ms.GetMatch (buf));
|
||||
|
||||
// matching offsets in ms.capture
|
||||
|
||||
Serial.print ("Captures: ");
|
||||
Serial.println (ms.level);
|
||||
|
||||
for (int j = 0; j < ms.level; j++)
|
||||
{
|
||||
Serial.print ("Capture number: ");
|
||||
Serial.println (j + 1, DEC);
|
||||
Serial.print ("Text: '");
|
||||
Serial.print (ms.GetCapture (buf, j));
|
||||
Serial.println ("'");
|
||||
|
||||
}
|
||||
break;
|
||||
|
||||
case REGEXP_NOMATCH:
|
||||
Serial.println ("No match.");
|
||||
break;
|
||||
|
||||
default:
|
||||
Serial.print ("Regexp error: ");
|
||||
Serial.println (result, DEC);
|
||||
break;
|
||||
|
||||
} // end of switch
|
||||
|
||||
} // end of setup
|
||||
|
||||
void loop () {} // end of loop
|
||||
|
||||
// ------------------------------------- //
|
||||
|
||||
|
||||
PATTERNS
|
||||
|
||||
Patterns
|
||||
|
||||
The standard patterns (character classes) you can search for are:
|
||||
|
||||
|
||||
. --- (a dot) represents all characters.
|
||||
%a --- all letters.
|
||||
%c --- all control characters.
|
||||
%d --- all digits.
|
||||
%l --- all lowercase letters.
|
||||
%p --- all punctuation characters.
|
||||
%s --- all space characters.
|
||||
%u --- all uppercase letters.
|
||||
%w --- all alphanumeric characters.
|
||||
%x --- all hexadecimal digits.
|
||||
%z --- the character with hex representation 0x00 (null).
|
||||
%% --- a single '%' character.
|
||||
|
||||
%1 --- captured pattern 1.
|
||||
%2 --- captured pattern 2 (and so on).
|
||||
%f[s] transition from not in set 's' to in set 's'.
|
||||
%b() balanced pair ( ... )
|
||||
|
||||
|
||||
Important! - the uppercase versions of the above represent the complement of the class.
|
||||
eg. %U represents everything except uppercase letters, %D represents everything except digits.
|
||||
|
||||
There are some "magic characters" (such as %) that have special meanings. These are:
|
||||
|
||||
|
||||
^ $ ( ) % . [ ] * + - ?
|
||||
|
||||
|
||||
If you want to use those in a pattern (as themselves) you must precede them by a % symbol.
|
||||
|
||||
eg. %% would match a single %
|
||||
|
||||
You can build your own pattern classes (sets) by using square brackets, eg.
|
||||
|
||||
|
||||
[abc] ---> matches a, b or c
|
||||
[a-z] ---> matches lowercase letters (same as %l)
|
||||
[^abc] ---> matches anything except a, b or c
|
||||
[%a%d] ---> matches all letters and digits
|
||||
|
||||
[%a%d_] ---> matches all letters, digits and underscore
|
||||
[%[%]] ---> matches square brackets (had to escape them with %)
|
||||
|
||||
|
||||
You can use pattern classes in the form %x in the set.
|
||||
If you use other characters (like periods and brackets, etc.) they are simply themselves.
|
||||
|
||||
You can specify a range of character inside a set by using simple characters (not pattern classes like %a) separated by a hyphen.
|
||||
For example, [A-Z] or [0-9]. These can be combined with other things. For example [A-Z0-9] or [A-Z,.].
|
||||
|
||||
A end-points of a range must be given in ascending order. That is, [A-Z] would match upper-case letters, but [Z-A] would not match anything.
|
||||
|
||||
You can negate a set by starting it with a "^" symbol, thus [^0-9] is everything except the digits 0 to 9.
|
||||
The negation applies to the whole set, so [^%a%d] would match anything except letters or digits.
|
||||
In anywhere except the first position of a set, the "^" symbol is simply itself.
|
||||
|
||||
Inside a set (that is a sequence delimited by square brackets) the only "magic" characters are:
|
||||
|
||||
] ---> to end the set, unless preceded by %
|
||||
% ---> to introduce a character class (like %a), or magic character (like "]")
|
||||
^ ---> in the first position only, to negate the set (eg. [^A-Z)
|
||||
- ---> between two characters, to specify a range (eg. [A-F])
|
||||
|
||||
|
||||
Thus, inside a set, characters like "." and "?" are just themselves.
|
||||
|
||||
The repetition characters, which can follow a character, class or set, are:
|
||||
|
||||
|
||||
+ ---> 1 or more repetitions (greedy)
|
||||
* ---> 0 or more repetitions (greedy)
|
||||
|
||||
- ---> 0 or more repetitions (non greedy)
|
||||
? ---> 0 or 1 repetition only
|
||||
|
||||
|
||||
A "greedy" match will match on as many characters as possible, a non-greedy one will match on as few as possible.
|
||||
|
||||
The standard "anchor" characters apply:
|
||||
|
||||
|
||||
^ ---> anchor to start of subject string
|
||||
$ ---> anchor to end of subject string
|
||||
|
||||
|
||||
You can also use round brackets to specify "captures":
|
||||
|
||||
|
||||
You see (.*) here
|
||||
|
||||
|
||||
Here, whatever matches (.*) becomes the first pattern.
|
||||
|
||||
You can also refer to matched substrings (captures) later on in an expression:
|
||||
|
||||
eg. This would match:
|
||||
|
||||
string = "You see dogs and dogs"
|
||||
regexp = "You see (.*) and %1"
|
||||
|
||||
|
||||
This example shows how you can look for a repetition of a word matched earlier, whatever that word was ("dogs" in this case).
|
||||
|
||||
As a special case, an empty capture string returns as the captured pattern, the position of itself in the string. eg.
|
||||
|
||||
string = "You see dogs and dogs"
|
||||
regexp = "You .* ()dogs .*"
|
||||
|
||||
This would return a capture with an offset of 8, and a length of CAP_POSITION (-2)
|
||||
|
||||
Finally you can look for nested "balanced" things (such as parentheses) by using %b, like this:
|
||||
|
||||
|
||||
string = "I see a (big fish (swimming) in the pond) here"
|
||||
regexp = "%b()"
|
||||
|
||||
|
||||
After %b you put 2 characters, which indicate the start and end of the balanced pair.
|
||||
If it finds a nested version it keeps processing until we are back at the top level.
|
||||
In this case the matching string was "(big fish (swimming) in the pond)".
|
||||
|
||||
|
||||
*/
|
||||
|
||||
|
||||
#include <setjmp.h>
|
||||
#include <ctype.h>
|
||||
#include <string.h>
|
||||
#include "Regexp.h"
|
||||
|
||||
// for throwing errors
|
||||
static jmp_buf regexp_error_return;
|
||||
typedef unsigned char byte;
|
||||
|
||||
// error codes raised during regexp processing
|
||||
static byte error (const char err)
|
||||
{
|
||||
// does not return
|
||||
longjmp (regexp_error_return, err);
|
||||
return 0; // keep compiler happy
|
||||
} // end of error
|
||||
|
||||
static int check_capture (MatchState *ms, int l) {
|
||||
l -= '1';
|
||||
if (l < 0 || l >= ms->level || ms->capture[l].len == CAP_UNFINISHED)
|
||||
return error(ERR_INVALID_CAPTURE_INDEX);
|
||||
return l;
|
||||
} // end of check_capture
|
||||
|
||||
static int capture_to_close (MatchState *ms) {
|
||||
int level = ms->level;
|
||||
for (level--; level>=0; level--)
|
||||
if (ms->capture[level].len == CAP_UNFINISHED) return level;
|
||||
return error(ERR_INVALID_PATTERN_CAPTURE);
|
||||
} // end of capture_to_close
|
||||
|
||||
static const char *classend (MatchState *ms, const char *p) {
|
||||
switch (*p++) {
|
||||
case REGEXP_ESC: {
|
||||
if (*p == '\0')
|
||||
error(ERR_MALFORMED_PATTERN_ENDS_WITH_ESCAPE);
|
||||
return p+1;
|
||||
}
|
||||
case '[': {
|
||||
if (*p == '^') p++;
|
||||
do { /* look for a `]' */
|
||||
if (*p == '\0')
|
||||
error(ERR_MALFORMED_PATTERN_ENDS_WITH_RH_SQUARE_BRACKET);
|
||||
if (*(p++) == REGEXP_ESC && *p != '\0')
|
||||
p++; /* skip escapes (e.g. `%]') */
|
||||
} while (*p != ']');
|
||||
return p+1;
|
||||
}
|
||||
default: {
|
||||
return p;
|
||||
}
|
||||
}
|
||||
} // end of classend
|
||||
|
||||
|
||||
static int match_class (int c, int cl) {
|
||||
int res;
|
||||
switch (tolower(cl)) {
|
||||
case 'a' : res = isalpha(c); break;
|
||||
case 'c' : res = iscntrl(c); break;
|
||||
case 'd' : res = isdigit(c); break;
|
||||
case 'l' : res = islower(c); break;
|
||||
case 'p' : res = ispunct(c); break;
|
||||
case 's' : res = isspace(c); break;
|
||||
case 'u' : res = isupper(c); break;
|
||||
case 'w' : res = isalnum(c); break;
|
||||
case 'x' : res = isxdigit(c); break;
|
||||
case 'z' : res = (c == 0); break;
|
||||
default: return (cl == c);
|
||||
}
|
||||
return (islower(cl) ? res : !res);
|
||||
} // end of match_class
|
||||
|
||||
|
||||
static int matchbracketclass (int c, const char *p, const char *ec) {
|
||||
int sig = 1;
|
||||
if (*(p+1) == '^') {
|
||||
sig = 0;
|
||||
p++; /* skip the `^' */
|
||||
}
|
||||
while (++p < ec) {
|
||||
if (*p == REGEXP_ESC) {
|
||||
p++;
|
||||
if (match_class(c, uchar(*p)))
|
||||
return sig;
|
||||
}
|
||||
else if ((*(p+1) == '-') && (p+2 < ec)) {
|
||||
p+=2;
|
||||
if (uchar(*(p-2)) <= c && c <= uchar(*p))
|
||||
return sig;
|
||||
}
|
||||
else if (uchar(*p) == c) return sig;
|
||||
}
|
||||
return !sig;
|
||||
} // end of matchbracketclass
|
||||
|
||||
|
||||
static int singlematch (int c, const char *p, const char *ep) {
|
||||
switch (*p) {
|
||||
case '.': return 1; /* matches any char */
|
||||
case REGEXP_ESC: return match_class(c, uchar(*(p+1)));
|
||||
case '[': return matchbracketclass(c, p, ep-1);
|
||||
default: return (uchar(*p) == c);
|
||||
}
|
||||
} // end of singlematch
|
||||
|
||||
|
||||
static const char *match (MatchState *ms, const char *s, const char *p);
|
||||
|
||||
|
||||
static const char *matchbalance (MatchState *ms, const char *s,
|
||||
const char *p) {
|
||||
if (*p == 0 || *(p+1) == 0)
|
||||
error(ERR_UNBALANCED_PATTERN);
|
||||
if (*s != *p) return NULL;
|
||||
else {
|
||||
int b = *p;
|
||||
int e = *(p+1);
|
||||
int cont = 1;
|
||||
while (++s < ms->src_end) {
|
||||
if (*s == e) {
|
||||
if (--cont == 0) return s+1;
|
||||
}
|
||||
else if (*s == b) cont++;
|
||||
}
|
||||
}
|
||||
return NULL; /* string ends out of balance */
|
||||
} // end of matchbalance
|
||||
|
||||
|
||||
static const char *max_expand (MatchState *ms, const char *s,
|
||||
const char *p, const char *ep) {
|
||||
int i = 0; /* counts maximum expand for item */
|
||||
while ((s+i)<ms->src_end && singlematch(uchar(*(s+i)), p, ep))
|
||||
i++;
|
||||
/* keeps trying to match with the maximum repetitions */
|
||||
while (i>=0) {
|
||||
const char *res = match(ms, (s+i), ep+1);
|
||||
if (res) return res;
|
||||
i--; /* else didn't match; reduce 1 repetition to try again */
|
||||
}
|
||||
return NULL;
|
||||
} // end of max_expand
|
||||
|
||||
|
||||
static const char *min_expand (MatchState *ms, const char *s,
|
||||
const char *p, const char *ep) {
|
||||
for (;;) {
|
||||
const char *res = match(ms, s, ep+1);
|
||||
if (res != NULL)
|
||||
return res;
|
||||
else if (s<ms->src_end && singlematch(uchar(*s), p, ep))
|
||||
s++; /* try with one more repetition */
|
||||
else return NULL;
|
||||
}
|
||||
} // end of min_expand
|
||||
|
||||
|
||||
static const char *start_capture (MatchState *ms, const char *s,
|
||||
const char *p, int what) {
|
||||
const char *res;
|
||||
int level = ms->level;
|
||||
if (level >= MAXCAPTURES) error(ERR_TOO_MANY_CAPTURES);
|
||||
ms->capture[level].init = s;
|
||||
ms->capture[level].len = what;
|
||||
ms->level = level+1;
|
||||
if ((res=match(ms, s, p)) == NULL) /* match failed? */
|
||||
ms->level--; /* undo capture */
|
||||
return res;
|
||||
} // end of start_capture
|
||||
|
||||
|
||||
static const char *end_capture (MatchState *ms, const char *s,
|
||||
const char *p) {
|
||||
int l = capture_to_close(ms);
|
||||
const char *res;
|
||||
ms->capture[l].len = s - ms->capture[l].init; /* close capture */
|
||||
if ((res = match(ms, s, p)) == NULL) /* match failed? */
|
||||
ms->capture[l].len = CAP_UNFINISHED; /* undo capture */
|
||||
return res;
|
||||
} // end of end_capture
|
||||
|
||||
|
||||
static const char *match_capture (MatchState *ms, const char *s, int l) {
|
||||
size_t len;
|
||||
l = check_capture(ms, l);
|
||||
len = ms->capture[l].len;
|
||||
if ((size_t)(ms->src_end-s) >= len &&
|
||||
memcmp(ms->capture[l].init, s, len) == 0)
|
||||
return s+len;
|
||||
else return NULL;
|
||||
} // end of match_capture
|
||||
|
||||
|
||||
static const char *match (MatchState *ms, const char *s, const char *p) {
|
||||
init: /* using goto's to optimize tail recursion */
|
||||
switch (*p) {
|
||||
case '(': { /* start capture */
|
||||
if (*(p+1) == ')') /* position capture? */
|
||||
return start_capture(ms, s, p+2, CAP_POSITION);
|
||||
else
|
||||
return start_capture(ms, s, p+1, CAP_UNFINISHED);
|
||||
}
|
||||
case ')': { /* end capture */
|
||||
return end_capture(ms, s, p+1);
|
||||
}
|
||||
case REGEXP_ESC: {
|
||||
switch (*(p+1)) {
|
||||
case 'b': { /* balanced string? */
|
||||
s = matchbalance(ms, s, p+2);
|
||||
if (s == NULL) return NULL;
|
||||
p+=4; goto init; /* else return match(ms, s, p+4); */
|
||||
}
|
||||
case 'f': { /* frontier? */
|
||||
const char *ep; char previous;
|
||||
p += 2;
|
||||
if (*p != '[')
|
||||
error(ERR_MISSING_LH_SQUARE_BRACKET_AFTER_ESC_F);
|
||||
ep = classend(ms, p); /* points to what is next */
|
||||
previous = (s == ms->src) ? '\0' : *(s-1);
|
||||
if (matchbracketclass(uchar(previous), p, ep-1) ||
|
||||
!matchbracketclass(uchar(*s), p, ep-1)) return NULL;
|
||||
p=ep; goto init; /* else return match(ms, s, ep); */
|
||||
}
|
||||
default: {
|
||||
if (isdigit(uchar(*(p+1)))) { /* capture results (%0-%9)? */
|
||||
s = match_capture(ms, s, uchar(*(p+1)));
|
||||
if (s == NULL) return NULL;
|
||||
p+=2; goto init; /* else return match(ms, s, p+2) */
|
||||
}
|
||||
goto dflt; /* case default */
|
||||
}
|
||||
}
|
||||
}
|
||||
case '\0': { /* end of pattern */
|
||||
return s; /* match succeeded */
|
||||
}
|
||||
case '$': {
|
||||
if (*(p+1) == '\0') /* is the `$' the last char in pattern? */
|
||||
return (s == ms->src_end) ? s : NULL; /* check end of string */
|
||||
else goto dflt;
|
||||
}
|
||||
default: dflt: { /* it is a pattern item */
|
||||
const char *ep = classend(ms, p); /* points to what is next */
|
||||
int m = s<ms->src_end && singlematch(uchar(*s), p, ep);
|
||||
switch (*ep) {
|
||||
case '?': { /* optional */
|
||||
const char *res;
|
||||
if (m && ((res=match(ms, s+1, ep+1)) != NULL))
|
||||
return res;
|
||||
p=ep+1; goto init; /* else return match(ms, s, ep+1); */
|
||||
}
|
||||
case '*': { /* 0 or more repetitions */
|
||||
return max_expand(ms, s, p, ep);
|
||||
}
|
||||
case '+': { /* 1 or more repetitions */
|
||||
return (m ? max_expand(ms, s+1, p, ep) : NULL);
|
||||
}
|
||||
case '-': { /* 0 or more repetitions (minimum) */
|
||||
return min_expand(ms, s, p, ep);
|
||||
}
|
||||
default: {
|
||||
if (!m) return NULL;
|
||||
s++; p=ep; goto init; /* else return match(ms, s+1, ep); */
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} // end of match
|
||||
|
||||
|
||||
// functions below written by Nick Gammon ...
|
||||
|
||||
char MatchState::Match (const char * pattern, unsigned int index)
|
||||
{
|
||||
// set up for throwing errors
|
||||
char rtn = setjmp (regexp_error_return);
|
||||
|
||||
// error return
|
||||
if (rtn)
|
||||
return ((result = rtn));
|
||||
|
||||
if (!src)
|
||||
error (ERR_NO_TARGET_STRING);
|
||||
|
||||
if (index > src_len)
|
||||
index = src_len;
|
||||
|
||||
int anchor = (*pattern == '^') ? (pattern++, 1) : 0;
|
||||
const char *s1 =src + index;
|
||||
src_end = src + src_len;
|
||||
|
||||
// iterate through target string, character by character unless anchored
|
||||
do {
|
||||
const char *res;
|
||||
level = 0;
|
||||
if ((res=match(this, s1, pattern)) != NULL)
|
||||
{
|
||||
MatchStart = s1 - src;
|
||||
MatchLength = res - s1;
|
||||
return (result = REGEXP_MATCHED);
|
||||
} // end of match at this position
|
||||
} while (s1++ < src_end && !anchor);
|
||||
|
||||
return (result = REGEXP_NOMATCH); // no match
|
||||
|
||||
} // end of regexp
|
||||
|
||||
// set up the target string
|
||||
void MatchState::Target (char * s)
|
||||
{
|
||||
Target (s, strlen (s));
|
||||
} // end of MatchState::Target
|
||||
|
||||
void MatchState::Target (char * s, const unsigned int len)
|
||||
{
|
||||
src = s;
|
||||
src_len = len;
|
||||
result = REGEXP_NOMATCH;
|
||||
} // end of MatchState::Target
|
||||
|
||||
// copy the match string to user-supplied buffer
|
||||
// buffer must be large enough to hold it
|
||||
char * MatchState::GetMatch (char * s) const
|
||||
{
|
||||
if (result != REGEXP_MATCHED)
|
||||
s [0] = 0;
|
||||
else
|
||||
{
|
||||
memcpy (s, &src [MatchStart], MatchLength);
|
||||
s [MatchLength] = 0; // null-terminated string
|
||||
}
|
||||
return s;
|
||||
} // end of MatchState::GetMatch
|
||||
|
||||
// get one of the capture strings (zero-relative level)
|
||||
// buffer must be large enough to hold it
|
||||
char * MatchState::GetCapture (char * s, const int n) const
|
||||
{
|
||||
if (result != REGEXP_MATCHED || n >= level || capture [n].len <= 0)
|
||||
s [0] = 0;
|
||||
else
|
||||
{
|
||||
memcpy (s, capture [n].init, capture [n].len);
|
||||
s [capture [n].len] = 0; // null-terminated string
|
||||
}
|
||||
return s;
|
||||
} // end of MatchState::GetCapture
|
||||
|
||||
// match repeatedly on a string, return count of matches
|
||||
unsigned int MatchState::MatchCount (const char * pattern)
|
||||
{
|
||||
unsigned int count = 0;
|
||||
|
||||
// keep matching until we run out of matches
|
||||
for (unsigned int index = 0;
|
||||
Match (pattern, index) > 0 &&
|
||||
index < src_len; // otherwise empty matches loop
|
||||
count++)
|
||||
// increment index ready for next time, go forwards at least one byte
|
||||
index = MatchStart + (MatchLength == 0 ? 1 : MatchLength);
|
||||
|
||||
return count;
|
||||
|
||||
} // end of MatchState::MatchCount
|
||||
|
||||
// match repeatedly on a string, call function f for each match
|
||||
unsigned int MatchState::GlobalMatch (const char * pattern, GlobalMatchCallback f)
|
||||
{
|
||||
unsigned int count = 0;
|
||||
|
||||
// keep matching until we run out of matches
|
||||
for (unsigned int index = 0;
|
||||
Match (pattern, index) > 0;
|
||||
count++)
|
||||
{
|
||||
f (& src [MatchStart], MatchLength, *this);
|
||||
// increment index ready for next time, go forwards at least one byte
|
||||
index = MatchStart + (MatchLength == 0 ? 1 : MatchLength);
|
||||
} // end of for each match
|
||||
return count;
|
||||
|
||||
} // end of MatchState::GlobalMatch
|
||||
|
||||
// match repeatedly on a string, call function f for each match
|
||||
// f sets replacement string, incorporate replacement and continue
|
||||
// maximum of max_count replacements if max_count > 0
|
||||
// replacement string in GlobalReplaceCallback must stay in scope (eg. static string or literal)
|
||||
unsigned int MatchState::GlobalReplace (const char * pattern, GlobalReplaceCallback f, const unsigned int max_count)
|
||||
{
|
||||
unsigned int count = 0;
|
||||
|
||||
// keep matching until we run out of matches
|
||||
for (unsigned int index = 0;
|
||||
Match (pattern, index) > 0 && // stop when no match
|
||||
index < src_len && // otherwise empty matches loop
|
||||
(max_count == 0 || count < max_count); // stop when count reached
|
||||
count++)
|
||||
{
|
||||
// default is to replace with self
|
||||
const char * replacement = &src [MatchStart];
|
||||
unsigned int replacement_length = MatchLength;
|
||||
|
||||
// increment index ready for next time, go forwards at least one byte
|
||||
if (MatchLength == 0)
|
||||
index = MatchStart + 1; // go forwards at least one byte or we will loop forever
|
||||
else
|
||||
{
|
||||
// increment index ready for next time,
|
||||
index = MatchStart + MatchLength;
|
||||
|
||||
// call function to find replacement text
|
||||
f (&src [MatchStart], MatchLength, replacement, replacement_length, *this);
|
||||
|
||||
// see how much memory we need to move
|
||||
int lengthDiff = MatchLength - replacement_length;
|
||||
|
||||
// copy the rest of the buffer backwards/forwards to allow for the length difference
|
||||
memmove (&src [index - lengthDiff], &src [index], src_len - index);
|
||||
|
||||
// copy in the replacement
|
||||
memmove (&src [MatchStart], replacement, replacement_length);
|
||||
|
||||
// adjust the index for the next search
|
||||
index -= lengthDiff;
|
||||
// and the length of the source
|
||||
src_len -= lengthDiff;
|
||||
} // end if matching at least one byte
|
||||
} // end of for each match
|
||||
|
||||
// put a terminating null in
|
||||
src [src_len] = 0;
|
||||
return count;
|
||||
} // end of MatchState::GlobalReplace
|
||||
|
||||
|
||||
// match repeatedly on a string, replaces with replacement string for each match
|
||||
// maximum of max_count replacements if max_count > 0
|
||||
// replacement string in GlobalReplaceCallback must stay in scope (eg. static string or literal)
|
||||
unsigned int MatchState::GlobalReplace (const char * pattern, const char * replacement, const unsigned int max_count)
|
||||
{
|
||||
unsigned int count = 0;
|
||||
unsigned int replacement_length = strlen (replacement);
|
||||
|
||||
// keep matching until we run out of matches
|
||||
for (unsigned int index = 0;
|
||||
Match (pattern, index) > 0 && // stop when no match
|
||||
index < src_len && // otherwise empty matches loop
|
||||
(max_count == 0 || count < max_count); // stop when count reached
|
||||
count++)
|
||||
{
|
||||
if (MatchLength == 0)
|
||||
index = MatchStart + 1; // go forwards at least one byte or we will loop forever
|
||||
else
|
||||
{
|
||||
// increment index ready for next time,
|
||||
index = MatchStart + MatchLength;
|
||||
|
||||
// see how much memory we need to move
|
||||
int lengthDiff = MatchLength - replacement_length;
|
||||
|
||||
// copy the rest of the buffer backwards/forwards to allow for the length difference
|
||||
memmove (&src [index - lengthDiff], &src [index], src_len - index);
|
||||
|
||||
// copy in the replacement
|
||||
memmove (&src [MatchStart], replacement, replacement_length);
|
||||
|
||||
// adjust the index for the next search
|
||||
index -= lengthDiff;
|
||||
// and the length of the source
|
||||
src_len -= lengthDiff;
|
||||
} // end if matching at least one byte
|
||||
|
||||
} // end of for each match
|
||||
|
||||
// put a terminating null in
|
||||
src [src_len] = 0;
|
||||
return count;
|
||||
} // end of MatchState::GlobalReplace
|
||||
|
||||
@@ -0,0 +1,131 @@
|
||||
/*
|
||||
|
||||
Regular-expression matching library for Arduino.
|
||||
|
||||
Written by Nick Gammon.
|
||||
Date: 30 April 2011
|
||||
|
||||
Heavily based on the Lua regular expression matching library written by Roberto Ierusalimschy.
|
||||
|
||||
Adapted to run on the Arduino by Nick Gammon.
|
||||
|
||||
|
||||
VERSION
|
||||
|
||||
Version 1.0 - 30th April 2011 : initial release.
|
||||
Version 1.1 - 1st May 2011 : added some helper functions, made more modular.
|
||||
Version 1.2 - 19th May 2011 : added more helper functions for replacing etc.
|
||||
|
||||
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
|
||||
// Maximum of captures we can return.
|
||||
// Increase if you need more, decrease to save memory.
|
||||
#define MAXCAPTURES 32
|
||||
|
||||
// the "magic escape" character
|
||||
#define REGEXP_ESC '%'
|
||||
|
||||
// special characters that have to be escaped
|
||||
// (not used in the library, but you might need this)
|
||||
#define REGEXP_SPECIALS "^$*+?.([%-"
|
||||
|
||||
// Result codes from calling regexp:
|
||||
|
||||
// we got a match
|
||||
#define REGEXP_MATCHED 1
|
||||
|
||||
// no match, or not attempted to match yet
|
||||
#define REGEXP_NOMATCH 0
|
||||
|
||||
// errors when matching
|
||||
#define ERR_INVALID_CAPTURE_INDEX -1
|
||||
#define ERR_INVALID_PATTERN_CAPTURE -2
|
||||
#define ERR_MALFORMED_PATTERN_ENDS_WITH_ESCAPE -3
|
||||
#define ERR_MALFORMED_PATTERN_ENDS_WITH_RH_SQUARE_BRACKET -4
|
||||
#define ERR_UNBALANCED_PATTERN -5
|
||||
#define ERR_TOO_MANY_CAPTURES -6
|
||||
#define ERR_MISSING_LH_SQUARE_BRACKET_AFTER_ESC_F -7
|
||||
#define ERR_NO_TARGET_STRING -8
|
||||
|
||||
|
||||
/* macro to `unsign' a character */
|
||||
#define uchar(c) ((unsigned char)(c))
|
||||
|
||||
// special capture "lengths"
|
||||
#define CAP_UNFINISHED (-1)
|
||||
#define CAP_POSITION (-2)
|
||||
|
||||
class MatchState; // forward definition for the callback routines
|
||||
|
||||
typedef void (*GlobalMatchCallback) (const char * match, // matching string (not null-terminated)
|
||||
const unsigned int length, // length of matching string
|
||||
const MatchState & ms); // MatchState in use (to get captures)
|
||||
typedef void (*GlobalReplaceCallback) (const char * match, // matching string (not null-terminated)
|
||||
const unsigned int length, // length of matching string
|
||||
const char * & replacement,
|
||||
unsigned int & replacement_length,
|
||||
const MatchState & ms); // MatchState in use (to get captures)
|
||||
typedef class MatchState {
|
||||
private:
|
||||
|
||||
char result; // result of last Match call
|
||||
|
||||
public:
|
||||
|
||||
MatchState () : result (REGEXP_NOMATCH), src (0) {}; // constructor
|
||||
MatchState (char * s) : result (REGEXP_NOMATCH)
|
||||
{ Target (s); }; // constructor from null-terminated string
|
||||
MatchState (char * s, const unsigned int len) : result (REGEXP_NOMATCH)
|
||||
{ Target (s, len); }; // constructor from string and length
|
||||
|
||||
// supply these two:
|
||||
char *src; /* source string */
|
||||
unsigned int src_len; /* length of source string */
|
||||
|
||||
// used internally
|
||||
char *src_end; /* end of source string */
|
||||
|
||||
// returned fields:
|
||||
|
||||
unsigned int MatchStart; // zero-relative offset of start of match
|
||||
unsigned int MatchLength; // length of match
|
||||
|
||||
int level; /* total number of captures in array below (finished or unfinished) */
|
||||
|
||||
// capture addresses and lengths
|
||||
struct {
|
||||
const char *init;
|
||||
int len; // might be CAP_UNFINISHED or CAP_POSITION
|
||||
} capture[MAXCAPTURES];
|
||||
|
||||
// add target string, null-terminated
|
||||
void Target (char * s);
|
||||
// add target string, with specified length
|
||||
void Target (char * s, const unsigned int len);
|
||||
// do a match on a supplied pattern and zero-relative starting point
|
||||
char Match (const char * pattern, unsigned int index = 0);
|
||||
// return the matching string
|
||||
char * GetMatch (char * s) const;
|
||||
// return capture string n
|
||||
char * GetCapture (char * s, const int n) const;
|
||||
// get result of previous match
|
||||
char GetResult () const { return result; }
|
||||
|
||||
// count number of matches on a supplied pattern
|
||||
unsigned int MatchCount (const char * pattern);
|
||||
// iterate with a supplied pattern, call function f for each match
|
||||
// returns count of matches
|
||||
unsigned int GlobalMatch (const char * pattern, GlobalMatchCallback f);
|
||||
// iterate with a supplied pattern, call function f for each match, maximum of max_count matches if max_count > 0
|
||||
// returns count of replacements
|
||||
unsigned int GlobalReplace (const char * pattern, GlobalReplaceCallback f, const unsigned int max_count = 0);
|
||||
// iterate with a supplied pattern, replaces with replacement string, maximum of max_count matches if max_count > 0
|
||||
// returns count of replacements
|
||||
unsigned int GlobalReplace (const char * pattern, const char * replacement, const unsigned int max_count = 0);
|
||||
|
||||
} MatchState;
|
||||
|
||||
Reference in New Issue
Block a user