| 88 |
|
|
| 89 |
/* Line ending types */ |
/* Line ending types */ |
| 90 |
|
|
| 91 |
enum { EL_LF, EL_CR, EL_CRLF, EL_ANY }; |
enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF }; |
| 92 |
|
|
| 93 |
|
|
| 94 |
|
|
| 196 |
{ OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" }, |
{ OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" }, |
| 197 |
{ OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" }, |
{ OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" }, |
| 198 |
{ OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" }, |
{ OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" }, |
| 199 |
{ OP_STRING, 'N', &newline, "newline=type", "specify newline type (CR, LR, CRLF)" }, |
{ OP_STRING, 'N', &newline, "newline=type", "specify newline type (CR, LF, CRLF, ANYCRLF or ANY)" }, |
| 200 |
{ OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" }, |
{ OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" }, |
| 201 |
{ OP_NODATA, 'o', NULL, "only-matching", "show only the part of the line that matched" }, |
{ OP_NODATA, 'o', NULL, "only-matching", "show only the part of the line that matched" }, |
| 202 |
{ OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" }, |
{ OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" }, |
| 226 |
static const char *suffix[] = { |
static const char *suffix[] = { |
| 227 |
"", "\\b", ")$", ")$", "\\E", "\\E\\b", "\\E)$", "\\E)$" }; |
"", "\\b", ")$", ")$", "\\E", "\\E\\b", "\\E)$", "\\E)$" }; |
| 228 |
|
|
| 229 |
/* UTF-8 tables - used only when the newline setting is "all". */ |
/* UTF-8 tables - used only when the newline setting is "any". */ |
| 230 |
|
|
| 231 |
const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01}; |
const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01}; |
| 232 |
|
|
| 545 |
} |
} |
| 546 |
break; |
break; |
| 547 |
|
|
| 548 |
|
case EL_ANYCRLF: |
| 549 |
|
while (p < endptr) |
| 550 |
|
{ |
| 551 |
|
int extra = 0; |
| 552 |
|
register int c = *((unsigned char *)p); |
| 553 |
|
|
| 554 |
|
if (utf8 && c >= 0xc0) |
| 555 |
|
{ |
| 556 |
|
int gcii, gcss; |
| 557 |
|
extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */ |
| 558 |
|
gcss = 6*extra; |
| 559 |
|
c = (c & utf8_table3[extra]) << gcss; |
| 560 |
|
for (gcii = 1; gcii <= extra; gcii++) |
| 561 |
|
{ |
| 562 |
|
gcss -= 6; |
| 563 |
|
c |= (p[gcii] & 0x3f) << gcss; |
| 564 |
|
} |
| 565 |
|
} |
| 566 |
|
|
| 567 |
|
p += 1 + extra; |
| 568 |
|
|
| 569 |
|
switch (c) |
| 570 |
|
{ |
| 571 |
|
case 0x0a: /* LF */ |
| 572 |
|
*lenptr = 1; |
| 573 |
|
return p; |
| 574 |
|
|
| 575 |
|
case 0x0d: /* CR */ |
| 576 |
|
if (p < endptr && *p == 0x0a) |
| 577 |
|
{ |
| 578 |
|
*lenptr = 2; |
| 579 |
|
p++; |
| 580 |
|
} |
| 581 |
|
else *lenptr = 1; |
| 582 |
|
return p; |
| 583 |
|
|
| 584 |
|
default: |
| 585 |
|
break; |
| 586 |
|
} |
| 587 |
|
} /* End of loop for ANYCRLF case */ |
| 588 |
|
|
| 589 |
|
*lenptr = 0; /* Must have hit the end */ |
| 590 |
|
return endptr; |
| 591 |
|
|
| 592 |
case EL_ANY: |
case EL_ANY: |
| 593 |
while (p < endptr) |
while (p < endptr) |
| 594 |
{ |
{ |
| 687 |
return p; /* But control should never get here */ |
return p; /* But control should never get here */ |
| 688 |
|
|
| 689 |
case EL_ANY: |
case EL_ANY: |
| 690 |
|
case EL_ANYCRLF: |
| 691 |
if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--; |
if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--; |
| 692 |
if (utf8) while ((*p & 0xc0) == 0x80) p--; |
if (utf8) while ((*p & 0xc0) == 0x80) p--; |
| 693 |
|
|
| 716 |
} |
} |
| 717 |
else c = *((unsigned char *)pp); |
else c = *((unsigned char *)pp); |
| 718 |
|
|
| 719 |
switch (c) |
if (endlinetype == EL_ANYCRLF) switch (c) |
| 720 |
|
{ |
| 721 |
|
case 0x0a: /* LF */ |
| 722 |
|
case 0x0d: /* CR */ |
| 723 |
|
return p; |
| 724 |
|
|
| 725 |
|
default: |
| 726 |
|
break; |
| 727 |
|
} |
| 728 |
|
|
| 729 |
|
else switch (c) |
| 730 |
{ |
{ |
| 731 |
case 0x0a: /* LF */ |
case 0x0a: /* LF */ |
| 732 |
case 0x0b: /* VT */ |
case 0x0b: /* VT */ |
| 1567 |
case '\r': newline = (char *)"cr"; break; |
case '\r': newline = (char *)"cr"; break; |
| 1568 |
case ('\r' << 8) | '\n': newline = (char *)"crlf"; break; |
case ('\r' << 8) | '\n': newline = (char *)"crlf"; break; |
| 1569 |
case -1: newline = (char *)"any"; break; |
case -1: newline = (char *)"any"; break; |
| 1570 |
|
case -2: newline = (char *)"anycrlf"; break; |
| 1571 |
} |
} |
| 1572 |
|
|
| 1573 |
/* Process the options */ |
/* Process the options */ |
| 1875 |
pcre_options |= PCRE_NEWLINE_ANY; |
pcre_options |= PCRE_NEWLINE_ANY; |
| 1876 |
endlinetype = EL_ANY; |
endlinetype = EL_ANY; |
| 1877 |
} |
} |
| 1878 |
|
else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0) |
| 1879 |
|
{ |
| 1880 |
|
pcre_options |= PCRE_NEWLINE_ANYCRLF; |
| 1881 |
|
endlinetype = EL_ANYCRLF; |
| 1882 |
|
} |
| 1883 |
else |
else |
| 1884 |
{ |
{ |
| 1885 |
fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline); |
fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline); |