C中的正则表达式:例子?

我在一些简单的例子和如何在ANSI c中使用正则表达式的最佳实践后,man regex.h没有提供那么多的帮助。

413028 次浏览

正则表达式实际上不是ANSI c的一部分。听起来你可能在谈论POSIX正则表达式库,它带有大多数(所有?)*nix。下面是一个在C语言中使用POSIX正则表达式的例子(基于):

#include <regex.h>
regex_t regex;
int reti;
char msgbuf[100];


/* Compile regular expression */
reti = regcomp(&regex, "^a[[:alnum:]]", 0);
if (reti) {
fprintf(stderr, "Could not compile regex\n");
exit(1);
}


/* Execute regular expression */
reti = regexec(&regex, "abc", 0, NULL, 0);
if (!reti) {
puts("Match");
}
else if (reti == REG_NOMATCH) {
puts("No match");
}
else {
regerror(reti, &regex, msgbuf, sizeof(msgbuf));
fprintf(stderr, "Regex match failed: %s\n", msgbuf);
exit(1);
}


/* Free memory allocated to the pattern buffer by regcomp() */
regfree(&regex);

或者,您可能想要查看PCRE,这是一个用于c语言中与Perl兼容的正则表达式的库。Perl语法与Java、Python和许多其他语言中使用的语法几乎相同。POSIX语法是grepsedvi等使用的语法。

这可能不是你想要的,但是像re2c这样的工具可以将POSIX(-ish)正则表达式编译为ANSI c。它是作为lex的替代品编写的,但是如果你真的需要它,这种方法允许你牺牲灵活性和易读性来换取最后一点速度。

man regex.h没有显示regex.h的任何手动条目,但是man 3 regex显示了一个页面,解释了用于模式匹配的POSIX函数。
GNU C库:正则表达式匹配中描述了相同的函数,它解释了GNU C库既支持POSIX.2接口,也支持GNU C库多年来一直拥有的接口

例如,对于一个假设的程序,它输出作为参数传递的字符串中哪一个与作为第一个参数传递的模式匹配,您可以使用类似于下面的代码。

#include <errno.h>
#include <regex.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
   

void print_regerror (int errcode, size_t length, regex_t *compiled);


int
main (int argc, char *argv[])
{
regex_t regex;
int result;


if (argc < 3)
{
// The number of passed arguments is lower than the number of
// expected arguments.
fputs ("Missing command line arguments\n", stderr);
return EXIT_FAILURE;
}


result = regcomp (&regex, argv[1], REG_EXTENDED);
if (result)
{
// Any value different from 0 means it was not possible to
// compile the regular expression, either for memory problems
// or problems with the regular expression syntax.
if (result == REG_ESPACE)
fprintf (stderr, "%s\n", strerror(ENOMEM));
else
fputs ("Syntax error in the regular expression passed as first argument\n", stderr);
return EXIT_FAILURE;
}
for (int i = 2; i < argc; i++)
{
result = regexec (&regex, argv[i], 0, NULL, 0);
if (!result)
{
printf ("'%s' matches the regular expression\n", argv[i]);
}
else if (result == REG_NOMATCH)
{
printf ("'%s' doesn't the regular expression\n", argv[i]);
}
else
{
// The function returned an error; print the string
// describing it.
// Get the size of the buffer required for the error message.
size_t length = regerror (result, &regex, NULL, 0);
print_regerror (result, length, &regex);
return EXIT_FAILURE;
}
}


/* Free the memory allocated from regcomp(). */
regfree (&regex);
return EXIT_SUCCESS;
}


void
print_regerror (int errcode, size_t length, regex_t *compiled)
{
char buffer[length];
(void) regerror (errcode, compiled, buffer, length);
fprintf(stderr, "Regex match failed: %s\n", buffer);
}

regcomp()的最后一个参数至少需要是REG_EXTENDED,否则函数将使用基本正则表达式,这意味着(例如)你需要使用a\{3\}而不是从扩展正则表达式中使用的a{3},这可能是你期望使用的。

POSIX.2还有另一个通配符匹配函数:fnmatch()。它不允许编译正则表达式,或获取匹配子表达式的子字符串,但它非常特定于检查文件名是否匹配通配符(例如,它使用FNM_PATHNAME标志)。

虽然上面的答案很好,但我建议使用PCRE2。这意味着你现在可以使用所有的正则表达式示例,而不必从一些古老的正则表达式翻译过来。

我已经对此做了一个回答,但我认为它在这里也有帮助。

Regex In C To Search For Credit卡号 .

// YOU MUST SPECIFY THE UNIT WIDTH BEFORE THE INCLUDE OF THE pcre.h


#define PCRE2_CODE_UNIT_WIDTH 8
#include <stdio.h>
#include <string.h>
#include <pcre2.h>
#include <stdbool.h>


int main(){


bool Debug = true;
bool Found = false;
pcre2_code *re;
PCRE2_SPTR pattern;
PCRE2_SPTR subject;
int errornumber;
int i;
int rc;
PCRE2_SIZE erroroffset;
PCRE2_SIZE *ovector;
size_t subject_length;
pcre2_match_data *match_data;




char * RegexStr = "(?:\\D|^)(5[1-5][0-9]{2}(?:\\ |\\-|)[0-9]{4}(?:\\ |\\-|)[0-9]{4}(?:\\ |\\-|)[0-9]{4})(?:\\D|$)";
char * source = "5111 2222 3333 4444";


pattern = (PCRE2_SPTR)RegexStr;// <<<<< This is where you pass your REGEX
subject = (PCRE2_SPTR)source;// <<<<< This is where you pass your bufer that will be checked.
subject_length = strlen((char *)subject);








re = pcre2_compile(
pattern,               /* the pattern */
PCRE2_ZERO_TERMINATED, /* indicates pattern is zero-terminated */
0,                     /* default options */
&errornumber,          /* for error number */
&erroroffset,          /* for error offset */
NULL);                 /* use default compile context */


/* Compilation failed: print the error message and exit. */
if (re == NULL)
{
PCRE2_UCHAR buffer[256];
pcre2_get_error_message(errornumber, buffer, sizeof(buffer));
printf("PCRE2 compilation failed at offset %d: %s\n", (int)erroroffset,buffer);
return 1;
}




match_data = pcre2_match_data_create_from_pattern(re, NULL);


rc = pcre2_match(
re,
subject,              /* the subject string */
subject_length,       /* the length of the subject */
0,                    /* start at offset 0 in the subject */
0,                    /* default options */
match_data,           /* block for storing the result */
NULL);


if (rc < 0)
{
switch(rc)
{
case PCRE2_ERROR_NOMATCH: //printf("No match\n"); //
pcre2_match_data_free(match_data);
pcre2_code_free(re);
Found = 0;
return Found;
//  break;
/*
Handle other special cases if you like
*/
default: printf("Matching error %d\n", rc); //break;
}
pcre2_match_data_free(match_data);   /* Release memory used for the match */
pcre2_code_free(re);
Found = 0;                /* data and the compiled pattern. */
return Found;
}




if (Debug){
ovector = pcre2_get_ovector_pointer(match_data);
printf("Match succeeded at offset %d\n", (int)ovector[0]);


if (rc == 0)
printf("ovector was not big enough for all the captured substrings\n");




if (ovector[0] > ovector[1])
{
printf("\\K was used in an assertion to set the match start after its end.\n"
"From end to start the match was: %.*s\n", (int)(ovector[0] - ovector[1]),
(char *)(subject + ovector[1]));
printf("Run abandoned\n");
pcre2_match_data_free(match_data);
pcre2_code_free(re);
return 0;
}


for (i = 0; i < rc; i++)
{
PCRE2_SPTR substring_start = subject + ovector[2*i];
size_t substring_length = ovector[2*i+1] - ovector[2*i];
printf("%2d: %.*s\n", i, (int)substring_length, (char *)substring_start);
}
}


else{
if(rc > 0){
Found = true;


}
}
pcre2_match_data_free(match_data);
pcre2_code_free(re);
return Found;


}

使用以下方法安装PCRE:

wget https://ftp.pcre.org/pub/pcre/pcre2-10.31.zip
make
sudo make install
sudo ldconfig

编译使用:

gcc foo.c -lpcre2-8 -o foo

查看我的回答了解更多细节。

这是一个使用REG_EXTENDED的例子。 这个正则表达式

"^(-)?([0-9]+)((,|.)([0-9]+))?\n$"

允许您捕捉十进制数字在西班牙语系统和国际。:)

#include <regex.h>
#include <stdlib.h>
#include <stdio.h>
regex_t regex;
int reti;
char msgbuf[100];


int main(int argc, char const *argv[])
{
while(1){
fgets( msgbuf, 100, stdin );
reti = regcomp(&regex, "^(-)?([0-9]+)((,|.)([0-9]+))?\n$", REG_EXTENDED);
if (reti) {
fprintf(stderr, "Could not compile regex\n");
exit(1);
}


/* Execute regular expression */
printf("%s\n", msgbuf);
reti = regexec(&regex, msgbuf, 0, NULL, 0);
if (!reti) {
puts("Match");
}
else if (reti == REG_NOMATCH) {
puts("No match");
}
else {
regerror(reti, &regex, msgbuf, sizeof(msgbuf));
fprintf(stderr, "Regex match failed: %s\n", msgbuf);
exit(1);
}


/* Free memory allocated to the pattern buffer by regcomp() */
regfree(&regex);
}


}