Skip to content

Commit dc2c26f

Browse files
authored
Use PCRE library for regex (#76)
1 parent 1923625 commit dc2c26f

File tree

5 files changed

+45
-46
lines changed

5 files changed

+45
-46
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ Cross platform .NET grok implementation as a NuGet package
77
[![License](https://img.shields.io/badge/license-MIT-blue.svg)](https://github.com/Marusyk/grok.net/blob/main/LICENSE)
88
[![contributions welcome](https://img.shields.io/badge/contributions-welcome-brightgreen.svg?style=flat)](https://github.com/Marusyk/grok.net/blob/main/CONTRIBUTING.md)
99

10-
[![NuGet version](https://badge.fury.io/nu/grok.net.svg)](https://badge.fury.io/nu/grok.net)
10+
[![NuGet version](https://img.shields.io/nuget/v/grok.net.svg?logo=NuGet)](https://www.nuget.org/packages/grok.net)
1111
[![Nuget](https://img.shields.io/nuget/dt/grok.net.svg)](https://www.nuget.org/packages/Grok.Net)
1212
[![PowerShell Gallery Version](https://img.shields.io/powershellgallery/v/Grok)](https://www.powershellgallery.com/packages/Grok)
1313
[![PowerShell Gallery](https://img.shields.io/powershellgallery/dt/Grok)](https://www.powershellgallery.com/packages/Grok)

benchmark/ParseBenchmark.cs

Lines changed: 16 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -11,55 +11,53 @@ public class ParseBenchmark
1111
private static readonly Grok _grokCustom = new("%{ZIPCODE:zipcode}:%{EMAILADDRESS:email}");
1212

1313
[Benchmark]
14-
public void Empty()
14+
public GrokResult Empty()
1515
{
16-
_ = _grokEmpty.Parse("");
16+
return _grokEmpty.Parse("");
1717
}
1818

1919
[Benchmark]
20-
public void Custom()
20+
public GrokResult Custom()
2121
{
22-
_ = _grokCustom.Parse("06590:[email protected]");
22+
return _grokCustom.Parse("06590:[email protected]");
2323
}
2424

2525
[Benchmark]
26-
public void Log()
26+
public GrokResult Log()
2727
{
28-
_ = _grokLog.Parse(@"06-21-19 21:00:13:589241;15;INFO;main;DECODED: 775233900043 DECODED BY: 18500738 DISTANCE: 1.5165
28+
return _grokLog.Parse(@"06-21-19 21:00:13:589241;15;INFO;main;DECODED: 775233900043 DECODED BY: 18500738 DISTANCE: 1.5165
2929
06-21-19 21:00:13:589265;156;WARN;main;DECODED: 775233900043 EMPTY DISTANCE: --------");
3030
}
3131

32-
[Params("DBG", "INF", "WARN", "ERR")]
33-
public string LogLevel { get; set; }
34-
3532
[Benchmark]
36-
public void LogWithParam()
33+
public bool LogWithParam()
3734
{
35+
const string logLevel = "INF";
3836
GrokResult grokResult = _grokLog.Parse($@"06-21-19 21:00:13:589241;15;INFO;main;DECODED: 775233900043 DECODED BY: 18500738 DISTANCE: 1.5165
39-
06-21-19 21:00:13:589265;156;{LogLevel};main;DECODED: 775233900043 EMPTY DISTANCE: --------");
37+
06-21-19 21:00:13:589265;156;{logLevel};main;DECODED: 775233900043 EMPTY DISTANCE: --------");
4038

41-
bool resut = (string)grokResult[0].Value == LogLevel;
39+
return (string)grokResult[0].Value == logLevel;
4240
}
4341

4442
[Benchmark]
45-
public void EmptyLocal()
43+
public GrokResult EmptyLocal()
4644
{
4745
Grok grokEmptyLocal = new Grok("");
48-
_ = grokEmptyLocal.Parse("");
46+
return grokEmptyLocal.Parse("");
4947
}
5048

5149
[Benchmark]
52-
public void CustomLocal()
50+
public GrokResult CustomLocal()
5351
{
5452
Grok grokCustomLocal = new Grok("%{ZIPCODE:zipcode}:%{EMAILADDRESS:email}");
55-
_ = grokCustomLocal.Parse("06590:[email protected]");
53+
return grokCustomLocal.Parse("06590:[email protected]");
5654
}
5755

5856
[Benchmark]
59-
public void LogLocal()
57+
public GrokResult LogLocal()
6058
{
6159
Grok grokLogLocal = new Grok("%{MONTHDAY:month}-%{MONTHDAY:day}-%{MONTHDAY:year} %{TIME:timestamp};%{WORD:id};%{LOGLEVEL:loglevel};%{WORD:func};%{GREEDYDATA:msg}");
62-
_ = grokLogLocal.Parse(@"06-21-19 21:00:13:589241;15;INFO;main;DECODED: 775233900043 DECODED BY: 18500738 DISTANCE: 1.5165
60+
return grokLogLocal.Parse(@"06-21-19 21:00:13:589241;15;INFO;main;DECODED: 775233900043 DECODED BY: 18500738 DISTANCE: 1.5165
6361
06-21-19 21:00:13:589265;156;WARN;main;DECODED: 775233900043 EMPTY DISTANCE: --------");
6462
}
6563
}

src/Grok.Net.Tests/UnitTests.cs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@
22
using System.Collections.Generic;
33
using System.IO;
44
using System.Linq;
5-
using System.Text.RegularExpressions;
65
using GrokNet;
6+
using PCRE;
77
using Xunit;
88

99
namespace GrokNetTests
@@ -311,7 +311,7 @@ public void Parse_Multiline_String_As_A_Single_Line_With_Regex_Options_Specified
311311
// Arrange
312312
const string timeKeyword = "loggingTime";
313313
const string messageKeyword = "message";
314-
const RegexOptions options = RegexOptions.Singleline;
314+
const PcreOptions options = PcreOptions.Singleline;
315315

316316
var multilineGrok = new Grok($"%{{TIMESTAMP_ISO8601:{timeKeyword}}} %{{GREEDYDATA:{messageKeyword}}}", options);
317317

@@ -338,7 +338,7 @@ Second line
338338
public void Load_Custom_Patterns_From_Stream_And_Parse_With_Regex_Options_Specified()
339339
{
340340
// Arrange
341-
const RegexOptions options = RegexOptions.Singleline;
341+
const PcreOptions options = PcreOptions.Singleline;
342342
const string zipcode = "122001";
343343
const string email = "[email protected]";
344344

@@ -356,7 +356,7 @@ public void Load_Custom_Patterns_From_Stream_And_Parse_With_Regex_Options_Specif
356356
public void Load_Custom_Patterns_And_Parse_With_Regex_Options_Specified()
357357
{
358358
// Arrange
359-
const RegexOptions options = RegexOptions.Singleline;
359+
const PcreOptions options = PcreOptions.Singleline;
360360
const string zipcode = "122001";
361361
var customPatterns = new Dictionary<string, string>
362362
{

src/Grok.Net/Grok.Net.csproj

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
</PropertyGroup>
2020

2121
<PropertyGroup>
22-
<Version>1.2.0</Version>
22+
<Version>2.0.0</Version>
2323
</PropertyGroup>
2424

2525
<ItemGroup>
@@ -32,6 +32,7 @@
3232

3333
<ItemGroup>
3434
<PackageReference Include="Microsoft.SourceLink.GitHub" Version="1.1.1" PrivateAssets="All" />
35+
<PackageReference Include="PCRE.NET" Version="0.20.0" />
3536
</ItemGroup>
3637

3738
<ItemGroup>

src/Grok.Net/Grok.cs

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
using System.Linq;
55
using System.Reflection;
66
using System.Text;
7-
using System.Text.RegularExpressions;
7+
using PCRE;
88

99
namespace GrokNet
1010
{
@@ -13,14 +13,14 @@ public class Grok
1313
private readonly string _grokPattern;
1414
private readonly Dictionary<string, string> _patterns;
1515
private readonly Dictionary<string, string> _typeMaps;
16-
private Regex _compiledRegex;
16+
private PcreRegex _compiledRegex;
1717
private IReadOnlyList<string> _patternGroupNames;
18-
private const RegexOptions _defaultRegexOptions = RegexOptions.Compiled | RegexOptions.ExplicitCapture;
19-
private readonly RegexOptions _regexOptions;
18+
private const PcreOptions _defaultRegexOptions = PcreOptions.Compiled | PcreOptions.ExplicitCapture;
19+
private readonly PcreOptions _regexOptions;
2020

21-
private static readonly Regex _grokRegex = new Regex("%{(\\w+):(\\w+)(?::\\w+)?}", RegexOptions.Compiled);
22-
private static readonly Regex _grokRegexWithType = new Regex("%{(\\w+):(\\w+):(\\w+)?}", RegexOptions.Compiled);
23-
private static readonly Regex _grokWithoutName = new Regex("%{(\\w+)}", RegexOptions.Compiled);
21+
private static readonly PcreRegex _grokRegex = new PcreRegex("%{(\\w+):(\\w+)(?::\\w+)?}", PcreOptions.Compiled);
22+
private static readonly PcreRegex _grokRegexWithType = new PcreRegex("%{(\\w+):(\\w+):(\\w+)?}", PcreOptions.Compiled);
23+
private static readonly PcreRegex _grokWithoutName = new PcreRegex("%{(\\w+)}", PcreOptions.Compiled);
2424

2525
/// <summary>
2626
/// Initializes a new instance of the <see cref="Grok"/> class with the specified Grok pattern.
@@ -41,7 +41,7 @@ public Grok(string grokPattern)
4141
/// </summary>
4242
/// <param name="grokPattern">The Grok pattern to use.</param>
4343
/// <param name="regexOptions">Additional regex options.</param>
44-
public Grok(string grokPattern, RegexOptions regexOptions)
44+
public Grok(string grokPattern, PcreOptions regexOptions)
4545
: this(grokPattern)
4646
{
4747
_regexOptions = _defaultRegexOptions | regexOptions;
@@ -65,7 +65,7 @@ public Grok(string grokPattern, Stream customPatterns)
6565
/// <param name="grokPattern">The Grok pattern to use.</param>
6666
/// <param name="customPatterns">A stream containing custom patterns.</param>
6767
/// <param name="regexOptions">Additional regex options.</param>
68-
public Grok(string grokPattern, Stream customPatterns, RegexOptions regexOptions)
68+
public Grok(string grokPattern, Stream customPatterns, PcreOptions regexOptions)
6969
: this(grokPattern, regexOptions)
7070
{
7171
LoadCustomPatterns(customPatterns);
@@ -84,12 +84,12 @@ public Grok(string grokPattern, IDictionary<string, string> customPatterns)
8484

8585
/// <summary>
8686
/// Initialized a new instance of the <see cref="Grok"/> class with specified Grok pattern,
87-
/// custom patterns if necessary, and custom <see cref="RegexOptions"/> .
87+
/// custom patterns if necessary, and custom <see cref="PcreOptions"/> .
8888
/// </summary>
8989
/// <param name="grokPattern">The Grok pattern to use.</param>
9090
/// <param name="customPatterns">Custom patterns to add.</param>
9191
/// <param name="regexOptions">Additional regex options.</param>
92-
public Grok(string grokPattern, IDictionary<string, string> customPatterns, RegexOptions regexOptions)
92+
public Grok(string grokPattern, IDictionary<string, string> customPatterns, PcreOptions regexOptions)
9393
: this(grokPattern, regexOptions)
9494
{
9595
LoadCustomPatterns(customPatterns);
@@ -109,7 +109,7 @@ public GrokResult Parse(string text)
109109

110110
var grokItems = new List<GrokItem>();
111111

112-
foreach (Match match in _compiledRegex.Matches(text))
112+
foreach (PcreMatch match in _compiledRegex.Matches(text))
113113
{
114114
foreach (string groupName in _patternGroupNames)
115115
{
@@ -157,14 +157,14 @@ private void ParsePattern()
157157
pattern = newPattern;
158158
} while (!done);
159159

160-
_compiledRegex = new Regex(pattern, _regexOptions);
161-
_patternGroupNames = _compiledRegex.GetGroupNames().ToList();
160+
_compiledRegex = new PcreRegex(pattern, _regexOptions);
161+
_patternGroupNames = _compiledRegex.PatternInfo.GroupNames.ToList();
162162
}
163163

164164
private void ProcessTypeMappings(ref string pattern)
165165
{
166-
MatchCollection matches = _grokRegexWithType.Matches(string.IsNullOrEmpty(pattern) ? _grokPattern : pattern);
167-
foreach (Match match in matches)
166+
IEnumerable<PcreMatch> matches = _grokRegexWithType.Matches(string.IsNullOrEmpty(pattern) ? _grokPattern : pattern);
167+
foreach (PcreMatch match in matches)
168168
{
169169
_typeMaps.Add(match.Groups[2].Value, match.Groups[3].Value);
170170
}
@@ -258,25 +258,25 @@ private static void EnsurePatternIsValid(string pattern)
258258
{
259259
try
260260
{
261-
_ = Regex.Match("", pattern);
261+
_ = PcreRegex.Match("", pattern);
262262
}
263263
catch (Exception e)
264264
{
265265
throw new FormatException($"Invalid regular expression {pattern}", e);
266266
}
267267
}
268268

269-
private string ReplaceWithName(Match match)
269+
private string ReplaceWithName(PcreMatch match)
270270
{
271-
Group group1 = match.Groups[2];
272-
Group group2 = match.Groups[1];
271+
PcreGroup group1 = match.Groups[2];
272+
PcreGroup group2 = match.Groups[1];
273273

274274
return _patterns.TryGetValue(group2.Value, out var str) ? $"(?<{group1}>{str})" : $"(?<{group1}>)";
275275
}
276276

277-
private string ReplaceWithoutName(Match match)
277+
private string ReplaceWithoutName(PcreMatch match)
278278
{
279-
Group group = match.Groups[1];
279+
PcreGroup group = match.Groups[1];
280280

281281
if (_patterns.TryGetValue(group.Value, out _))
282282
{

0 commit comments

Comments
 (0)