Skip to content

Commit 880b9be

Browse files
committed
Fixed CI issue related to needs upgrade checks
1 parent 64dc8ce commit 880b9be

File tree

12 files changed

+875
-288
lines changed

12 files changed

+875
-288
lines changed

src/EventLogExpert.EventDbTool/CreateDatabaseCommand.cs

Lines changed: 73 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
using EventLogExpert.Eventing.Providers;
77
using Microsoft.Extensions.DependencyInjection;
88
using System.CommandLine;
9+
using System.Text.RegularExpressions;
910

1011
namespace EventLogExpert.EventDbTool;
1112

@@ -83,50 +84,94 @@ private void CreateDatabase(string path, string? source, string? filter, string?
8384
return;
8485
}
8586

86-
if (source is not null && !ProviderSource.TryValidate(source, Logger)) { return; }
87+
if (!RegexHelper.TryCreate(filter, Logger, out var regex)) { return; }
8788

88-
HashSet<string> skipProviderNames = new(StringComparer.OrdinalIgnoreCase);
89+
if (source is not null && !ProviderSource.TryValidate(source, Logger)) { return; }
8990

90-
if (!string.IsNullOrWhiteSpace(skipProvidersInFile))
91+
try
9192
{
92-
if (!ProviderSource.TryValidate(skipProvidersInFile, Logger)) { return; }
93+
HashSet<string> skipProviderNames = new(StringComparer.OrdinalIgnoreCase);
9394

94-
foreach (var name in ProviderSource.LoadProviderNames(skipProvidersInFile, Logger))
95+
if (!string.IsNullOrWhiteSpace(skipProvidersInFile))
9596
{
96-
skipProviderNames.Add(name);
97+
if (!ProviderSource.TryValidate(skipProvidersInFile, Logger)) { return; }
98+
99+
foreach (var name in ProviderSource.LoadProviderNames(skipProvidersInFile, Logger))
100+
{
101+
skipProviderNames.Add(name);
102+
}
103+
104+
Logger.Info($"Found {skipProviderNames.Count} providers in {skipProvidersInFile}. These will not be included in the new database.");
97105
}
98106

99-
Logger.Info($"Found {skipProviderNames.Count} providers in {skipProvidersInFile}. These will not be included in the new database.");
100-
}
107+
// Load provider names first (cheap string-only query) for the empty check and header
108+
// formatting. This avoids materializing all ProviderDetails (with large compressed
109+
// payloads) just to compute the column widths.
110+
IReadOnlyList<string> providerNames = source is null
111+
? GetLocalProviderNames(regex)
112+
.Where(n => !skipProviderNames.Contains(n)).ToList()
113+
: ProviderSource.LoadProviderNames(source, Logger, regex)
114+
.Where(n => !skipProviderNames.Contains(n)).ToList();
101115

102-
IEnumerable<ProviderDetails> providersToAdd = source is null
103-
? LoadLocalProviders(filter, skipProviderNames)
104-
: ProviderSource.LoadProviders(source, Logger, filter, skipProviderNames);
116+
if (providerNames.Count == 0)
117+
{
118+
Logger.Warn($"No providers to add to the new database.");
119+
return;
120+
}
105121

106-
var providersNotSkipped = providersToAdd.ToList();
122+
LogProviderDetailHeader(providerNames);
107123

108-
if (providersNotSkipped.Count == 0)
109-
{
110-
Logger.Warn($"No providers to add to the new database.");
111-
return;
112-
}
124+
// Defer creating the DbContext (and therefore the .db file on disk) until we have
125+
// at least one provider to persist. This prevents leaving an empty database behind
126+
// when no provider details could be resolved (e.g., .evtx without LocaleMetaData).
127+
EventProviderDbContext? dbContext = null;
128+
129+
try
130+
{
131+
// Stream details directly into the DbContext. Batch saves prevent the change tracker
132+
// from accumulating all entities in memory at once.
133+
const int batchSize = 100;
134+
var count = 0;
113135

114-
using var dbContext = new EventProviderDbContext(path, false, Logger);
136+
IEnumerable<ProviderDetails> providersToAdd = source is null
137+
? LoadLocalProviders(regex, skipProviderNames)
138+
: ProviderSource.LoadProviders(source, Logger, regex, skipProviderNames);
115139

116-
LogProviderDetailHeader(providersNotSkipped.Select(p => p.ProviderName));
140+
foreach (var details in providersToAdd)
141+
{
142+
dbContext ??= new EventProviderDbContext(path, false, Logger);
143+
dbContext.ProviderDetails.Add(details);
144+
LogProviderDetails(details);
145+
count++;
117146

118-
foreach (var details in providersNotSkipped)
119-
{
120-
dbContext.ProviderDetails.Add(details);
121-
LogProviderDetails(details);
122-
}
147+
if (count % batchSize != 0) { continue; }
148+
dbContext.SaveChanges();
149+
dbContext.ChangeTracker.Clear();
150+
}
123151

124-
Logger.Info($"");
125-
Logger.Info($"Saving database. Please wait...");
152+
if (dbContext is null)
153+
{
154+
Logger.Warn($"No provider details could be resolved from the source. Database was not created.");
126155

127-
dbContext.SaveChanges();
156+
return;
157+
}
128158

129-
Logger.Info($"Done!");
159+
Logger.Info($"");
160+
Logger.Info($"Saving database. Please wait...");
161+
162+
dbContext.SaveChanges();
163+
164+
Logger.Info($"Done!");
165+
}
166+
finally
167+
{
168+
dbContext?.Dispose();
169+
}
170+
}
171+
catch (RegexMatchTimeoutException)
172+
{
173+
Logger.Error($"The --filter regex timed out. The pattern may cause catastrophic backtracking.");
174+
}
130175
}
131176

132177
}

src/EventLogExpert.EventDbTool/DbToolCommand.cs

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,21 +14,29 @@ public class DbToolCommand(ITraceLogger logger)
1414

1515
protected ITraceLogger Logger => logger;
1616

17-
protected static List<string> GetLocalProviderNames(string? filter)
17+
protected static List<string> GetLocalProviderNames(string? filter, ITraceLogger logger) =>
18+
!RegexHelper.TryCreate(filter, logger, out var regex) ? [] : GetLocalProviderNames(regex);
19+
20+
protected static List<string> GetLocalProviderNames(Regex? regex)
1821
{
1922
var providers = new List<string>(EventLogSession.GlobalSession.GetProviderNames().Distinct().OrderBy(name => name));
2023

21-
if (string.IsNullOrEmpty(filter)) { return providers; }
24+
return regex is null ? providers : providers.Where(p => regex.IsMatch(p)).ToList();
25+
}
2226

23-
var regex = new Regex(filter, RegexOptions.IgnoreCase);
24-
providers = providers.Where(p => regex.IsMatch(p)).ToList();
27+
protected IEnumerable<ProviderDetails> LoadLocalProviders(string? filter, IReadOnlySet<string>? skipProviderNames = null)
28+
{
29+
if (!RegexHelper.TryCreate(filter, Logger, out var regex)) { yield break; }
2530

26-
return providers;
31+
foreach (var details in LoadLocalProviders(regex, skipProviderNames))
32+
{
33+
yield return details;
34+
}
2735
}
2836

29-
protected IEnumerable<ProviderDetails> LoadLocalProviders(string? filter, IReadOnlySet<string>? skipProviderNames = null)
37+
protected IEnumerable<ProviderDetails> LoadLocalProviders(Regex? regex, IReadOnlySet<string>? skipProviderNames = null)
3038
{
31-
foreach (var providerName in GetLocalProviderNames(filter))
39+
foreach (var providerName in GetLocalProviderNames(regex))
3240
{
3341
// Skip BEFORE resolving so we don't pay the cost of loading metadata for providers we
3442
// are about to discard (e.g. when --skip-providers-in-file lists most local providers).

src/EventLogExpert.EventDbTool/DiffDatabaseCommand.cs

Lines changed: 42 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -79,43 +79,58 @@ private void DiffDatabase(string firstSource, string secondSource, string newDb)
7979

8080
var providersCopied = new List<ProviderDetails>();
8181

82-
using var newDbContext = new EventProviderDbContext(newDb, false, Logger);
82+
// Pass firstProviderNames as the skip set so providers present in the first source are
83+
// never resolved from the second source's metadata path. This is especially important when
84+
// the second source is .evtx+MTA, where each provider triggers an expensive load.
85+
Logger.Info($"Skipping up to {firstProviderNames.Count} provider name(s) from the second source that also appear in the first source.");
8386

84-
foreach (var details in ProviderSource.LoadProviders(secondSource, Logger))
87+
// Defer creating the DbContext (and therefore the .db file on disk) until at least one
88+
// provider is actually about to be persisted. This prevents leaving an empty database
89+
// behind when the second source yields no new providers.
90+
EventProviderDbContext? newDbContext = null;
91+
92+
try
8593
{
86-
if (firstProviderNames.Contains(details.ProviderName))
94+
foreach (var details in ProviderSource.LoadProviders(secondSource, Logger, filter: null, skipProviderNames: firstProviderNames))
8795
{
88-
Logger.Info($"Skipping {details.ProviderName} because it is present in both sources.");
89-
continue;
96+
Logger.Info($"Copying {details.ProviderName} because it is present in second source but not first.");
97+
98+
newDbContext ??= new EventProviderDbContext(newDb, false, Logger);
99+
100+
newDbContext.ProviderDetails.Add(new ProviderDetails
101+
{
102+
ProviderName = details.ProviderName,
103+
Events = details.Events,
104+
Parameters = details.Parameters,
105+
Keywords = details.Keywords,
106+
Messages = details.Messages,
107+
Opcodes = details.Opcodes,
108+
Tasks = details.Tasks
109+
});
110+
111+
providersCopied.Add(details);
90112
}
91113

92-
Logger.Info($"Copying {details.ProviderName} because it is present in second source but not first.");
93-
94-
newDbContext.ProviderDetails.Add(new ProviderDetails
114+
if (newDbContext is null)
95115
{
96-
ProviderName = details.ProviderName,
97-
Events = details.Events,
98-
Parameters = details.Parameters,
99-
Keywords = details.Keywords,
100-
Messages = details.Messages,
101-
Opcodes = details.Opcodes,
102-
Tasks = details.Tasks
103-
});
104-
105-
providersCopied.Add(details);
106-
}
107-
108-
newDbContext.SaveChanges();
116+
Logger.Warn($"No providers in the second source are missing from the first. Database was not created.");
117+
return;
118+
}
109119

110-
if (providersCopied.Count <= 0) { return; }
120+
newDbContext.SaveChanges();
111121

112-
Logger.Info($"Providers copied to new database:");
113-
Logger.Info($"");
114-
LogProviderDetailHeader(providersCopied.Select(p => p.ProviderName));
122+
Logger.Info($"Providers copied to new database:");
123+
Logger.Info($"");
124+
LogProviderDetailHeader(providersCopied.Select(p => p.ProviderName));
115125

116-
foreach (var provider in providersCopied)
126+
foreach (var provider in providersCopied)
127+
{
128+
LogProviderDetails(provider);
129+
}
130+
}
131+
finally
117132
{
118-
LogProviderDetails(provider);
133+
newDbContext?.Dispose();
119134
}
120135
}
121136
}

src/EventLogExpert.EventDbTool/MergeDatabaseCommand.cs

Lines changed: 58 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -67,33 +67,42 @@ private void MergeDatabase(string source, string targetFile, bool overwriteProvi
6767
return;
6868
}
6969

70-
var sourceProviders = ProviderSource.LoadProviders(source, Logger).ToList();
70+
// Load only the cheap projection of source provider names first. This avoids resolving
71+
// (and for .evtx+MTA sources, expensively materializing) provider details that will be
72+
// skipped because they already exist in the target.
73+
var sourceNames = new HashSet<string>(ProviderSource.LoadProviderNames(source, Logger), StringComparer.OrdinalIgnoreCase);
7174

72-
if (sourceProviders.Count == 0)
75+
if (sourceNames.Count == 0)
7376
{
7477
Logger.Warn($"No providers were discovered in the source.");
7578
return;
7679
}
7780

7881
using var targetContext = new EventProviderDbContext(targetFile, false, Logger);
7982

80-
// Pre-load all target ProviderName values once (cheap projection), then compute the case-
81-
// insensitive overlap with the source in-memory. This replaces a previous N+1 query that
82-
// issued one Where(...).ToList() per source provider against the target table.
83-
var sourceNames = new HashSet<string>(
84-
sourceProviders.Select(p => p.ProviderName),
85-
StringComparer.OrdinalIgnoreCase);
83+
// Query the overlap in the database by chunking sourceNames into IN-clause batches,
84+
// rather than pulling every target ProviderName into memory. Same chunk size as the
85+
// delete loop below to stay below SQLite's default parameter limit (999).
86+
var sourceNamesList = sourceNames.ToList();
87+
var targetMatchingNames = new List<string>();
8688

87-
var targetMatchingNames = targetContext.ProviderDetails
88-
.AsNoTracking()
89-
.Select(p => p.ProviderName)
90-
.ToList()
91-
.Where(n => sourceNames.Contains(n))
92-
.ToList();
89+
for (var offset = 0; offset < sourceNamesList.Count; offset += ProviderSource.MaxInClauseParameters)
90+
{
91+
var chunk = sourceNamesList
92+
.Skip(offset)
93+
.Take(ProviderSource.MaxInClauseParameters)
94+
.ToList();
95+
96+
targetMatchingNames.AddRange(
97+
targetContext.ProviderDetails
98+
.AsNoTracking()
99+
.Where(p => chunk.Contains(p.ProviderName))
100+
.Select(p => p.ProviderName));
101+
}
93102

94103
// Track the source-side provider names whose case-insensitive equivalent exists in target.
95104
// ProviderName is the primary key in the target DB, so case-sensitive uniqueness identifies
96-
// a row; the case-insensitive HashSet drives the no-overwrite skip check on the source side.
105+
// a row; the case-insensitive HashSet drives the no-overwrite skip on the source side.
97106
var providerNamesInTarget = new HashSet<string>(targetMatchingNames, StringComparer.OrdinalIgnoreCase);
98107

99108
if (targetMatchingNames.Count > 0)
@@ -104,15 +113,24 @@ private void MergeDatabase(string source, string targetFile, bool overwriteProvi
104113
{
105114
Logger.Info($"Removing these providers from the target database...");
106115

107-
// Single round-trip to load just the rows we need to remove. Since these names came
108-
// from the same DB, exact (binary) matching here is correct.
109-
var toRemove = targetContext.ProviderDetails
110-
.Where(p => targetMatchingNames.Contains(p.ProviderName))
111-
.ToList();
116+
// Chunk the IN-clause to stay below SQLite's parameter limit (default 999). Without
117+
// chunking, an --overwrite of a large overlap could throw at runtime.
118+
// ExecuteDelete() issues a SQL DELETE directly, avoiding change-tracker overhead.
119+
var removed = 0;
120+
121+
for (var offset = 0; offset < targetMatchingNames.Count; offset += ProviderSource.MaxInClauseParameters)
122+
{
123+
var chunk = targetMatchingNames
124+
.Skip(offset)
125+
.Take(ProviderSource.MaxInClauseParameters)
126+
.ToList();
112127

113-
targetContext.RemoveRange(toRemove);
114-
targetContext.SaveChanges();
115-
Logger.Info($"Removal of {toRemove.Count} provider row(s) completed.");
128+
removed += targetContext.ProviderDetails
129+
.Where(p => chunk.Contains(p.ProviderName))
130+
.ExecuteDelete();
131+
}
132+
133+
Logger.Info($"Removal of {removed} provider row(s) completed.");
116134
}
117135
else
118136
{
@@ -122,16 +140,20 @@ private void MergeDatabase(string source, string targetFile, bool overwriteProvi
122140

123141
Logger.Info($"Copying providers from the source...");
124142

143+
// When not overwriting, pass the overlap as the skip set so providers that already exist
144+
// in the target are never resolved from the source's metadata path. When overwriting, no
145+
// skip set is passed so all source providers are loaded and re-inserted.
146+
var skipForLoad = overwriteProviders ? null : providerNamesInTarget;
147+
125148
var providersCopied = new List<ProviderDetails>();
126149

127-
foreach (var provider in sourceProviders)
128-
{
129-
if (providerNamesInTarget.Contains(provider.ProviderName) && !overwriteProviders)
130-
{
131-
Logger.Info($"Skipping provider: {provider.ProviderName}");
132-
continue;
133-
}
150+
// Stream details into the DbContext with periodic SaveChanges so the change tracker does
151+
// not accumulate every entity for very large source sets.
152+
const int batchSize = 100;
153+
var count = 0;
134154

155+
foreach (var provider in ProviderSource.LoadProviders(source, Logger, filter: null, skipProviderNames: skipForLoad))
156+
{
135157
targetContext.ProviderDetails.Add(new ProviderDetails
136158
{
137159
ProviderName = provider.ProviderName,
@@ -144,6 +166,12 @@ private void MergeDatabase(string source, string targetFile, bool overwriteProvi
144166
});
145167

146168
providersCopied.Add(provider);
169+
count++;
170+
171+
if (count % batchSize != 0) { continue; }
172+
173+
targetContext.SaveChanges();
174+
targetContext.ChangeTracker.Clear();
147175
}
148176

149177
targetContext.SaveChanges();

0 commit comments

Comments
 (0)