@@ -67,33 +67,42 @@ private void MergeDatabase(string source, string targetFile, bool overwriteProvi
6767 return ;
6868 }
6969
70- var sourceProviders = ProviderSource . LoadProviders ( source , Logger ) . ToList ( ) ;
70+ // Load only the cheap projection of source provider names first. This avoids resolving
71+ // (and for .evtx+MTA sources, expensively materializing) provider details that will be
72+ // skipped because they already exist in the target.
73+ var sourceNames = new HashSet < string > ( ProviderSource . LoadProviderNames ( source , Logger ) , StringComparer . OrdinalIgnoreCase ) ;
7174
72- if ( sourceProviders . Count == 0 )
75+ if ( sourceNames . Count == 0 )
7376 {
7477 Logger . Warn ( $ "No providers were discovered in the source.") ;
7578 return ;
7679 }
7780
7881 using var targetContext = new EventProviderDbContext ( targetFile , false , Logger ) ;
7982
80- // Pre-load all target ProviderName values once (cheap projection), then compute the case-
81- // insensitive overlap with the source in-memory. This replaces a previous N+1 query that
82- // issued one Where(...).ToList() per source provider against the target table.
83- var sourceNames = new HashSet < string > (
84- sourceProviders . Select ( p => p . ProviderName ) ,
85- StringComparer . OrdinalIgnoreCase ) ;
83+ // Query the overlap in the database by chunking sourceNames into IN-clause batches,
84+ // rather than pulling every target ProviderName into memory. Same chunk size as the
85+ // delete loop below to stay below SQLite's default parameter limit (999).
86+ var sourceNamesList = sourceNames . ToList ( ) ;
87+ var targetMatchingNames = new List < string > ( ) ;
8688
87- var targetMatchingNames = targetContext . ProviderDetails
88- . AsNoTracking ( )
89- . Select ( p => p . ProviderName )
90- . ToList ( )
91- . Where ( n => sourceNames . Contains ( n ) )
92- . ToList ( ) ;
89+ for ( var offset = 0 ; offset < sourceNamesList . Count ; offset += ProviderSource . MaxInClauseParameters )
90+ {
91+ var chunk = sourceNamesList
92+ . Skip ( offset )
93+ . Take ( ProviderSource . MaxInClauseParameters )
94+ . ToList ( ) ;
95+
96+ targetMatchingNames . AddRange (
97+ targetContext . ProviderDetails
98+ . AsNoTracking ( )
99+ . Where ( p => chunk . Contains ( p . ProviderName ) )
100+ . Select ( p => p . ProviderName ) ) ;
101+ }
93102
94103 // Track the source-side provider names whose case-insensitive equivalent exists in target.
95104 // ProviderName is the primary key in the target DB, so case-sensitive uniqueness identifies
96- // a row; the case-insensitive HashSet drives the no-overwrite skip check on the source side.
105+ // a row; the case-insensitive HashSet drives the no-overwrite skip on the source side.
97106 var providerNamesInTarget = new HashSet < string > ( targetMatchingNames , StringComparer . OrdinalIgnoreCase ) ;
98107
99108 if ( targetMatchingNames . Count > 0 )
@@ -104,15 +113,24 @@ private void MergeDatabase(string source, string targetFile, bool overwriteProvi
104113 {
105114 Logger . Info ( $ "Removing these providers from the target database...") ;
106115
107- // Single round-trip to load just the rows we need to remove. Since these names came
108- // from the same DB, exact (binary) matching here is correct.
109- var toRemove = targetContext . ProviderDetails
110- . Where ( p => targetMatchingNames . Contains ( p . ProviderName ) )
111- . ToList ( ) ;
116+ // Chunk the IN-clause to stay below SQLite's parameter limit (default 999). Without
117+ // chunking, an --overwrite of a large overlap could throw at runtime.
118+ // ExecuteDelete() issues a SQL DELETE directly, avoiding change-tracker overhead.
119+ var removed = 0 ;
120+
121+ for ( var offset = 0 ; offset < targetMatchingNames . Count ; offset += ProviderSource . MaxInClauseParameters )
122+ {
123+ var chunk = targetMatchingNames
124+ . Skip ( offset )
125+ . Take ( ProviderSource . MaxInClauseParameters )
126+ . ToList ( ) ;
112127
113- targetContext . RemoveRange ( toRemove ) ;
114- targetContext . SaveChanges ( ) ;
115- Logger . Info ( $ "Removal of { toRemove . Count } provider row(s) completed.") ;
128+ removed += targetContext . ProviderDetails
129+ . Where ( p => chunk . Contains ( p . ProviderName ) )
130+ . ExecuteDelete ( ) ;
131+ }
132+
133+ Logger . Info ( $ "Removal of { removed } provider row(s) completed.") ;
116134 }
117135 else
118136 {
@@ -122,16 +140,20 @@ private void MergeDatabase(string source, string targetFile, bool overwriteProvi
122140
123141 Logger . Info ( $ "Copying providers from the source...") ;
124142
143+ // When not overwriting, pass the overlap as the skip set so providers that already exist
144+ // in the target are never resolved from the source's metadata path. When overwriting, no
145+ // skip set is passed so all source providers are loaded and re-inserted.
146+ var skipForLoad = overwriteProviders ? null : providerNamesInTarget ;
147+
125148 var providersCopied = new List < ProviderDetails > ( ) ;
126149
127- foreach ( var provider in sourceProviders )
128- {
129- if ( providerNamesInTarget . Contains ( provider . ProviderName ) && ! overwriteProviders )
130- {
131- Logger . Info ( $ "Skipping provider: { provider . ProviderName } ") ;
132- continue ;
133- }
150+ // Stream details into the DbContext with periodic SaveChanges so the change tracker does
151+ // not accumulate every entity for very large source sets.
152+ const int batchSize = 100 ;
153+ var count = 0 ;
134154
155+ foreach ( var provider in ProviderSource . LoadProviders ( source , Logger , filter : null , skipProviderNames : skipForLoad ) )
156+ {
135157 targetContext . ProviderDetails . Add ( new ProviderDetails
136158 {
137159 ProviderName = provider . ProviderName ,
@@ -144,6 +166,12 @@ private void MergeDatabase(string source, string targetFile, bool overwriteProvi
144166 } ) ;
145167
146168 providersCopied . Add ( provider ) ;
169+ count ++ ;
170+
171+ if ( count % batchSize != 0 ) { continue ; }
172+
173+ targetContext . SaveChanges ( ) ;
174+ targetContext . ChangeTracker . Clear ( ) ;
147175 }
148176
149177 targetContext . SaveChanges ( ) ;
0 commit comments