diff --git a/OpenCand.ETL/Parser/CsvServices/CsvParserService.cs b/OpenCand.ETL/Parser/CsvServices/CsvParserService.cs index 0118a62..b8a652c 100644 --- a/OpenCand.ETL/Parser/CsvServices/CsvParserService.cs +++ b/OpenCand.ETL/Parser/CsvServices/CsvParserService.cs @@ -1,6 +1,7 @@ using System.Globalization; using CsvHelper; using CsvHelper.Configuration; +using Microsoft.Extensions.Configuration; using Microsoft.Extensions.Logging; using OpenCand.ETL.Contracts; @@ -11,9 +12,12 @@ namespace OpenCand.Parser.Services private readonly ILogger> logger; private readonly CsvFixerService csvFixerService; private readonly IParserService parserService; + private readonly IConfiguration configuration; private readonly CsvConfiguration parserConfig; + private readonly int MaxDegreeOfParallelism; + // Progress tracking fields private long processedCount; private long totalCount; @@ -24,11 +28,26 @@ namespace OpenCand.Parser.Services public CsvParserService( ILogger> logger, IParserService parserService, - CsvFixerService csvFixerService) + CsvFixerService csvFixerService, + IConfiguration configuration) { this.logger = logger; this.csvFixerService = csvFixerService; this.parserService = parserService; + this.configuration = configuration; + + var defaultThreadCount = configuration.GetValue("ParserSettings:DefaultThreads", 25); + + if (configuration.GetValue($"ParserSettings:{typeof(CsvObj).Name}Threads") == null) + { + logger.LogInformation($"ParserSettings:{typeof(CsvObj).Name}Threads not found in configuration, using default value of {defaultThreadCount}."); + MaxDegreeOfParallelism = configuration.GetValue("ParserSettings:DefaultThreads", defaultThreadCount); + } + else + { + MaxDegreeOfParallelism = configuration.GetValue($"ParserSettings:{typeof(CsvObj).Name}Threads", defaultThreadCount); + logger.LogInformation($"Using {MaxDegreeOfParallelism} threads for parsing {typeof(CsvObj).Name} based on configuration."); + } parserConfig = new CsvConfiguration(CultureInfo.InvariantCulture) { @@ -60,7 +79,7 @@ namespace OpenCand.Parser.Services using var csv = new CsvReader(reader, parserConfig); var po = new ParallelOptions { - MaxDegreeOfParallelism = 40 + MaxDegreeOfParallelism = MaxDegreeOfParallelism }; //csv.Context.RegisterClassMap>(); // optional for advanced mapping, not needed diff --git a/OpenCand.ETL/Parser/ParserManager.cs b/OpenCand.ETL/Parser/ParserManager.cs index 1b20fc7..731682c 100644 --- a/OpenCand.ETL/Parser/ParserManager.cs +++ b/OpenCand.ETL/Parser/ParserManager.cs @@ -50,7 +50,7 @@ namespace OpenCand.Parser var bensCandidatosDirectory = Path.Combine(BasePath, csvSettings.BensCandidatosFolder); var redesSociaisDirectory = Path.Combine(BasePath, csvSettings.RedesSociaisFolder); - //await ParseFolder(candidatosDirectory, candidatoParserService); + await ParseFolder(candidatosDirectory, candidatoParserService); await ParseFolder(bensCandidatosDirectory, bemCandidatoParserService); await ParseFolder(redesSociaisDirectory, redeSocialParserService); diff --git a/OpenCand.ETL/appsettings.json b/OpenCand.ETL/appsettings.json index 9874775..0a20f41 100644 --- a/OpenCand.ETL/appsettings.json +++ b/OpenCand.ETL/appsettings.json @@ -14,5 +14,9 @@ "BensCandidatosFolder": "data/bem_candidato", "RedesSociaisFolder": "data/rede_social" }, + "ParserSettings": { + "DefaultThreads": 40, + "CandidatoCSVThreads": 5 + }, "BasePath": "sample" }