stuff and refactor
All checks were successful
API and ETL Build / build_etl (push) Successful in 8s
API and ETL Build / build_api (push) Successful in 9s

This commit is contained in:
Jose Henrique 2025-06-03 16:27:39 -03:00
parent 03b1f4f1d1
commit 2660826a3f
13 changed files with 505 additions and 425 deletions

View File

@ -15,18 +15,22 @@ namespace OpenCand.Repository
using (var connection = new NpgsqlConnection(ConnectionString)) using (var connection = new NpgsqlConnection(ConnectionString))
{ {
return (await connection.QueryAsync<Candidato>(@" return (await connection.QueryAsync<Candidato>(@"
SELECT * SELECT *,
CASE CASE
WHEN lower(nome) = lower(@query) THEN 0 -- Exact match (case-insensitive) WHEN lower(apelido) = lower(@query) THEN 0 -- apelido Exact match (case-insensitive)
WHEN lower(nome) LIKE lower(@query) || '%' THEN 1 -- Starts with match (case-insensitive) WHEN lower(apelido) LIKE lower(@query) || '%' THEN 1 -- apelido Starts with match (case-insensitive)
WHEN lower(nome) LIKE '%' || lower(@query) THEN 2 -- Contains anywhere match (case-insensitive) WHEN lower(apelido) LIKE '%' || lower(@query) THEN 2 -- apelido Contains anywhere match (case-insensitive)
WHEN cpf = @query THEN 0 -- Exact match for CPF WHEN lower(nome) = lower(@query) THEN 0 -- nome Exact match (case-insensitive)
WHEN cpf LIKE @query || '%' THEN 1 -- Starts with match for CPF WHEN lower(nome) LIKE lower(@query) || '%' THEN 1 -- nome Starts with match (case-insensitive)
WHEN cpf LIKE '%' || @query THEN 2 -- Contains anywhere match for CPF WHEN lower(nome) LIKE '%' || lower(@query) THEN 2 -- nome Contains anywhere match (case-insensitive)
WHEN cpf = @query THEN 0 -- cpf Exact match for CPF
WHEN cpf LIKE @query || '%' THEN 1 -- cpf Starts with match for CPF
WHEN cpf LIKE '%' || @query THEN 2 -- cpf Contains anywhere match for CPF
ELSE 3 ELSE 3
END AS name_rank END AS name_rank
FROM candidato FROM candidato
WHERE nome ILIKE '%' || @query || '%' OR WHERE apelido ILIKE '%' || @query || '%' OR
nome ILIKE '%' || @query || '%' OR
cpf ILIKE '%' || @query || '%' cpf ILIKE '%' || @query || '%'
ORDER BY name_rank, ORDER BY name_rank,
length(nome) ASC length(nome) ASC

View File

@ -0,0 +1,7 @@
namespace OpenCand.ETL.Contracts
{
public interface IParserService<CsvObj>
{
Task ParseObject(CsvObj record);
}
}

View File

@ -1,15 +1,14 @@
using System.Text; using System.Text;
using Microsoft.Extensions.Logging; using Microsoft.Extensions.Logging;
using OpenCand.Repository;
namespace OpenCand.Parser.Services namespace OpenCand.Parser.Services
{ {
public class CsvFixerService public class CsvFixerService
{ {
private readonly ILogger<CsvParserService> logger; private readonly ILogger<CsvFixerService> logger;
public CsvFixerService( public CsvFixerService(
ILogger<CsvParserService> logger) ILogger<CsvFixerService> logger)
{ {
this.logger = logger; this.logger = logger;
} }

View File

@ -0,0 +1,150 @@
using System.Globalization;
using CsvHelper;
using CsvHelper.Configuration;
using Microsoft.Extensions.Logging;
using OpenCand.ETL.Contracts;
namespace OpenCand.Parser.Services
{
public class CsvParserService<CsvObj> : IDisposable
{
private readonly ILogger<CsvParserService<CsvObj>> logger;
private readonly CsvFixerService csvFixerService;
private readonly IParserService<CsvObj> parserService;
private readonly CsvConfiguration parserConfig;
// Progress tracking fields
private long processedCount;
private long totalCount;
private string currentTask = string.Empty;
private Timer? progressTimer;
private readonly object progressLock = new object();
public CsvParserService(
ILogger<CsvParserService<CsvObj>> logger,
IParserService<CsvObj> parserService,
CsvFixerService csvFixerService)
{
this.logger = logger;
this.csvFixerService = csvFixerService;
this.parserService = parserService;
parserConfig = new CsvConfiguration(CultureInfo.InvariantCulture)
{
Delimiter = ";",
HasHeaderRecord = true,
PrepareHeaderForMatch = args => args.Header.ToLower(),
MissingFieldFound = null,
TrimOptions = TrimOptions.Trim,
Encoding = System.Text.Encoding.UTF8
};
}
public async Task ParseFolderAsync(string filePath)
{
logger.LogInformation($"ParseFolderAsync - Starting to parse '{filePath}'");
filePath = csvFixerService.FixCsvFile(filePath);
// Fix the CSV file if necessary
if (string.IsNullOrEmpty(filePath))
{
logger.LogError($"ParseFolderAsync - Failed to fix CSV file at '{filePath}'");
throw new InvalidOperationException($"Failed to fix CSV file at '{filePath}'");
}
try
{
using var reader = new StreamReader(filePath);
using var csv = new CsvReader(reader, parserConfig);
var po = new ParallelOptions
{
MaxDegreeOfParallelism = 40
};
//csv.Context.RegisterClassMap<ClassMap<CsvObj>>(); // optional for advanced mapping, not needed
var records = csv.GetRecords<CsvObj>().ToList();
StartProgressTracking($"Parsing {nameof(CsvObj)} - {Path.GetFileName(filePath)}", records.Count);
await Parallel.ForEachAsync(records, po, async (record, ct) =>
{
try
{
await parserService.ParseObject(record);
// Increment progress
IncrementProgress();
}
catch (Exception ex)
{
logger.LogError(ex, $"ParseFolderAsync - Error processing:");
IncrementProgress();
}
});
StopProgressTracking();
logger.LogInformation($"ParseFolderAsync - Finished parsing from {filePath}");
}
catch (Exception ex)
{
logger.LogError(ex, $"ParseFolderAsync - Error parsing file {filePath}");
throw;
}
}
// Progress tracking methods
private void StartProgressTracking(string taskName, long total)
{
lock (progressLock)
{
currentTask = taskName;
processedCount = 0;
totalCount = total;
progressTimer?.Dispose();
progressTimer = new Timer(LogProgress, null, TimeSpan.FromSeconds(5), TimeSpan.FromSeconds(5));
logger.LogInformation("Progress - Task: {Task}, Total: {Total}", currentTask, totalCount);
}
}
private void IncrementProgress()
{
Interlocked.Increment(ref processedCount);
}
private void StopProgressTracking()
{
lock (progressLock)
{
progressTimer?.Dispose();
progressTimer = null;
// Log final progress
var percentage = totalCount > 0 ? (double)processedCount / totalCount * 100 : 0;
logger.LogInformation("Progress - Task: {Task}, Processed: {Processed}, Total: {Total}, Progress: {Percentage:F2}%",
currentTask, processedCount, totalCount, percentage);
}
}
private void LogProgress(object? state)
{
lock (progressLock)
{
if (string.IsNullOrEmpty(currentTask)) return;
var percentage = totalCount > 0 ? (double)processedCount / totalCount * 100 : 0;
logger.LogInformation("Progress - Task: {Task}, Processed: {Processed}, Total: {Total}, Progress: {Percentage:F2}%",
currentTask, processedCount, totalCount, percentage);
}
}
public void Dispose()
{
progressTimer?.Dispose();
}
}
}

View File

@ -2,116 +2,81 @@ using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.Logging; using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options; using Microsoft.Extensions.Options;
using OpenCand.Config; using OpenCand.Config;
using OpenCand.Parser.Models;
using OpenCand.Parser.Services; using OpenCand.Parser.Services;
namespace OpenCand.Parser namespace OpenCand.Parser
{ {
public class ParserManager public class ParserManager
{ {
private readonly CsvParserService csvParserService;
private readonly ILogger<ParserManager> logger; private readonly ILogger<ParserManager> logger;
private readonly CsvSettings csvSettings; private readonly CsvSettings csvSettings;
private readonly IConfiguration configuration;
private readonly CsvParserService<CandidatoCSV> candidatoParserService;
private readonly CsvParserService<BemCandidatoCSV> bemCandidatoParserService;
private readonly CsvParserService<RedeSocialCSV> redeSocialParserService;
private readonly string BasePath;
public ParserManager( public ParserManager(
CsvParserService csvParserService,
IOptions<CsvSettings> csvSettings, IOptions<CsvSettings> csvSettings,
ILogger<ParserManager> logger, ILogger<ParserManager> logger,
IConfiguration configuration) IConfiguration configuration,
CsvParserService<CandidatoCSV> candidatoParserService,
CsvParserService<BemCandidatoCSV> bemCandidatoParserService,
CsvParserService<RedeSocialCSV> redeSocialParserService)
{ {
this.csvParserService = csvParserService;
this.logger = logger; this.logger = logger;
this.csvSettings = csvSettings.Value; this.csvSettings = csvSettings.Value;
this.configuration = configuration;
this.candidatoParserService = candidatoParserService;
this.bemCandidatoParserService = bemCandidatoParserService;
this.redeSocialParserService = redeSocialParserService;
// Get the base path from either SampleFolder in csvSettings or the BasePath in configuration
BasePath = configuration.GetValue<string>("BasePath") ?? string.Empty;
if (string.IsNullOrEmpty(BasePath))
{
throw new Exception("ParseFullDataAsync - BasePath is not configured in appsettings.json or CsvSettings.SampleFolder");
}
} }
public async Task ParseFullDataAsync() public async Task ParseFullDataAsync()
{ {
logger.LogInformation("ParseFullDataAsync - Starting parsing"); logger.LogInformation("ParseFullDataAsync - Starting parsing");
logger.LogInformation("ParseFullDataAsync - Processing will happen with BasePath: {BasePath}", BasePath);
// Get the base path from either SampleFolder in csvSettings or the BasePath in configuration var candidatosDirectory = Path.Combine(BasePath, csvSettings.CandidatosFolder);
var basePath = configuration.GetValue<string>("BasePath"); var bensCandidatosDirectory = Path.Combine(BasePath, csvSettings.BensCandidatosFolder);
var redesSociaisDirectory = Path.Combine(BasePath, csvSettings.RedesSociaisFolder);
if (string.IsNullOrEmpty(basePath)) //await ParseFolder(candidatosDirectory, candidatoParserService);
await ParseFolder(bensCandidatosDirectory, bemCandidatoParserService);
await ParseFolder(redesSociaisDirectory, redeSocialParserService);
logger.LogInformation("ParseFullDataAsync - Full data parsing completed!");
}
private async Task ParseFolder<CsvObj>(string csvDirectory, CsvParserService<CsvObj> csvParserService)
{
if (Directory.Exists(csvDirectory))
{ {
logger.LogError("ParseFullDataAsync - BasePath is not configured in appsettings.json or CsvSettings.SampleFolder"); foreach (var filePath in Directory.GetFiles(csvDirectory, "*.csv"))
return; {
// Check if filePath contains "fix_" prefix
if (filePath.Contains("fix_"))
{
logger.LogInformation("ParseFolder - Skipping already fixed file: {FilePath}", filePath);
continue;
}
logger.LogInformation("ParseFolder - Parsing data from {FilePath}", filePath);
await csvParserService.ParseFolderAsync(filePath);
}
} }
else
logger.LogInformation("ParseFullDataAsync - Processing will happen with BasePath: {BasePath}", basePath);
try
{ {
var candidatosDirectory = Path.Combine(basePath, csvSettings.CandidatosFolder); logger.LogWarning("ParseFolder - Directory not found at {Directory}", csvDirectory);
var bensCandidatosDirectory = Path.Combine(basePath, csvSettings.BensCandidatosFolder);
var redesSociaisDirectory = Path.Combine(basePath, csvSettings.RedesSociaisFolder);
if (Directory.Exists(candidatosDirectory))
{
foreach (var filePath in Directory.GetFiles(candidatosDirectory, "*.csv"))
{
// Check if filePath contains "fix_" prefix
if (filePath.Contains("fix_"))
{
logger.LogInformation("ParseFullDataAsync - Skipping already fixed file: {FilePath}", filePath);
continue;
}
logger.LogInformation("ParseFullDataAsync - Parsing candidatos data from {FilePath}", filePath);
await csvParserService.ParseCandidatosAsync(filePath);
}
}
else
{
logger.LogWarning("ParseFullDataAsync - 'Candidatos' directory not found at {Directory}", candidatosDirectory);
}
if (Directory.Exists(bensCandidatosDirectory))
{
foreach (var filePath in Directory.GetFiles(bensCandidatosDirectory, "*.csv"))
{
// Check if filePath contains "fix_" prefix
if (filePath.Contains("fix_"))
{
logger.LogInformation("ParseFullDataAsync - Skipping already fixed file: {FilePath}", filePath);
continue;
}
logger.LogInformation("ParseFullDataAsync - Parsing bens candidatos data from {FilePath}", filePath);
await csvParserService.ParseBensCandidatosAsync(filePath);
}
}
else
{
logger.LogWarning("ParseFullDataAsync - 'Bens candidatos' directory not found at {Directory}", bensCandidatosDirectory);
}
if (Directory.Exists(redesSociaisDirectory))
{
foreach (var filePath in Directory.GetFiles(redesSociaisDirectory, "*.csv"))
{
// Check if filePath contains "fix_" prefix
if (filePath.Contains("fix_"))
{
logger.LogInformation("ParseFullDataAsync - Skipping already fixed file: {FilePath}", filePath);
continue;
}
logger.LogInformation("ParseFullDataAsync - Parsing redes sociais data from {FilePath}", filePath);
await csvParserService.ParseRedeSocialAsync(filePath);
}
}
else
{
logger.LogWarning("ParseFullDataAsync - 'Redes sociais' directory not found at {Directory}", redesSociaisDirectory);
}
logger.LogInformation("ParseFullDataAsync - Full data parsing completed!");
}
catch (Exception ex)
{
logger.LogError(ex, "ParseFullDataAsync - Error parsing full data set");
throw;
} }
} }
} }

View File

@ -0,0 +1,52 @@
using Microsoft.Extensions.Logging;
using OpenCand.Core.Models;
using System.Globalization;
using OpenCand.ETL.Contracts;
using OpenCand.Parser.Models;
using OpenCand.Services;
using OpenCand.Parser.Services;
namespace OpenCand.ETL.Parser.ParserServices
{
public class BemCandidatoParserService : IParserService<BemCandidatoCSV>
{
private readonly ILogger<BemCandidatoParserService> logger;
private readonly BemCandidatoService bemCandidatoService;
public BemCandidatoParserService(
ILogger<BemCandidatoParserService> logger,
BemCandidatoService bemCandidatoService)
{
this.logger = logger;
this.bemCandidatoService = bemCandidatoService;
}
public async Task ParseObject(BemCandidatoCSV record)
{
// Parse decimal value
decimal? valor = null;
if (!string.IsNullOrEmpty(record.ValorBemCandidato))
{
string normalizedValue = record.ValorBemCandidato.Replace(".", "").Replace(",", ".");
if (decimal.TryParse(normalizedValue, NumberStyles.Any, CultureInfo.InvariantCulture, out var parsedValue))
{
valor = parsedValue;
}
}
var bemCandidato = new BemCandidato
{
SqCandidato = record.SequencialCandidato,
Ano = record.AnoEleicao,
SiglaUF = record.SiglaUF,
NomeUE = record.NomeUE,
OrdemBem = record.NumeroOrdemBemCandidato,
TipoBem = record.DescricaoTipoBemCandidato,
Descricao = record.DescricaoBemCandidato,
Valor = valor
};
await bemCandidatoService.AddBemCandidatoAsync(bemCandidato);
}
}
}

View File

@ -0,0 +1,100 @@
using System.Globalization;
using Microsoft.Extensions.Logging;
using OpenCand.Core.Models;
using OpenCand.ETL.Contracts;
using OpenCand.Parser.Models;
using OpenCand.Services;
namespace OpenCand.ETL.Parser.ParserServices
{
public class CandidatoParserService : IParserService<CandidatoCSV>
{
private readonly ILogger<CandidatoParserService> logger;
private readonly CandidatoService candidatoService;
public CandidatoParserService(
ILogger<CandidatoParserService> logger,
CandidatoService candidatoService)
{
this.logger = logger;
this.candidatoService = candidatoService;
}
public async Task ParseObject(CandidatoCSV record)
{
if (string.IsNullOrWhiteSpace(record.CPFCandidato) || record.CPFCandidato.Length <= 3)
{
record.CPFCandidato = null; // Handle null/empty/whitespace CPF
}
if (record.NomeCandidato == "NÃO DIVULGÁVEL" ||
string.IsNullOrEmpty(record.NomeCandidato) ||
record.NomeCandidato == "#NULO")
{
logger.LogCritical($"ParseCandidatosAsync - Candidate with id {record.SequencialCandidato} with invalid name, skipping...");
return; // Skip candidates with invalid name
}
if (string.IsNullOrWhiteSpace(record.Apelido) ||
record.Apelido.Contains("#NUL") ||
record.Apelido.Contains("NULO#") ||
record.Apelido.Contains("#NE"))
{
record.Apelido = null;
}
var candidato = new Candidato
{
Cpf = record.CPFCandidato,
SqCandidato = record.SequencialCandidato,
Nome = record.NomeCandidato,
Apelido = record.Apelido,
Email = record.Email.Contains("@") ? record.Email : null,
Sexo = record.Genero,
EstadoCivil = record.EstadoCivil,
Escolaridade = record.GrauInstrucao,
Ocupacao = record.Ocupacao,
Eleicoes = new List<CandidatoMapping>()
{
new CandidatoMapping
{
Cpf = record.CPFCandidato,
Nome = record.NomeCandidato,
Apelido = record.Apelido,
SqCandidato = record.SequencialCandidato,
Ano = record.AnoEleicao,
TipoEleicao = record.TipoAbrangencia,
NomeUE = record.NomeUE,
SiglaUF = record.SiglaUF,
Cargo = record.DescricaoCargo,
NrCandidato = record.NumeroCandidato,
Resultado = record.SituacaoTurno,
Partido = new Partido
{
Sigla = record.SiglaPartido,
Nome = record.NomePartido,
Numero = record.NumeroPartido,
}
}
}
};
if (!string.IsNullOrEmpty(record.DataNascimento) &&
record.DataNascimento != "#NULO")
{
if (DateTime.TryParseExact(record.DataNascimento, "dd/MM/yyyy",
CultureInfo.InvariantCulture, DateTimeStyles.AssumeLocal, out var dataNascimento))
{
// Convert to UTC DateTime to work with PostgreSQL timestamp with time zone
candidato.DataNascimento = DateTime.SpecifyKind(dataNascimento, DateTimeKind.Utc);
}
}
else
{
candidato.DataNascimento = null; // Handle null/empty/whitespace date
}
await candidatoService.AddCandidatoAsync(candidato);
}
}
}

View File

@ -0,0 +1,37 @@
using Microsoft.Extensions.Logging;
using OpenCand.Core.Models;
using OpenCand.ETL.Contracts;
using OpenCand.Parser.Models;
using OpenCand.Parser.Services;
using OpenCand.Services;
namespace OpenCand.ETL.Parser.ParserServices
{
public class RedeSocialParserService : IParserService<RedeSocialCSV>
{
private readonly ILogger<RedeSocialParserService> logger;
private readonly RedeSocialService redeSocialService;
public RedeSocialParserService(
ILogger<RedeSocialParserService> logger,
RedeSocialService redeSocialService)
{
this.logger = logger;
this.redeSocialService = redeSocialService;
}
public async Task ParseObject(RedeSocialCSV record)
{
var redeSocial = new RedeSocial
{
SqCandidato = record.SequencialCandidato,
Ano = record.DataEleicao,
SiglaUF = record.SiglaUF,
Link = record.Url,
Rede = string.Empty
};
await redeSocialService.AddRedeSocialAsync(redeSocial);
}
}
}

View File

@ -1,276 +0,0 @@
using System.Globalization;
using CsvHelper;
using CsvHelper.Configuration;
using Microsoft.Extensions.Logging;
using OpenCand.Core.Models;
using OpenCand.ETL.Parser.CsvMappers;
using OpenCand.Parser.CsvMappers;
using OpenCand.Parser.Models;
using OpenCand.Services;
namespace OpenCand.Parser.Services
{
public class CsvParserService
{
private readonly ILogger<CsvParserService> logger;
private readonly CandidatoService candidatoService;
private readonly BemCandidatoService bemCandidatoService;
private readonly RedeSocialService redeSocialService;
private readonly CsvFixerService csvFixerService;
private readonly CsvConfiguration parserConfig;
public CsvParserService(
ILogger<CsvParserService> logger,
CandidatoService candidatoService,
BemCandidatoService bemCandidatoService,
RedeSocialService redeSocialService,
CsvFixerService csvFixerService)
{
this.logger = logger;
this.candidatoService = candidatoService;
this.bemCandidatoService = bemCandidatoService;
this.redeSocialService = redeSocialService;
this.csvFixerService = csvFixerService;
parserConfig = new CsvConfiguration(CultureInfo.InvariantCulture)
{
Delimiter = ";",
HasHeaderRecord = true,
PrepareHeaderForMatch = args => args.Header.ToLower(),
MissingFieldFound = null,
TrimOptions = TrimOptions.Trim,
Encoding = System.Text.Encoding.UTF8
};
}
public async Task ParseCandidatosAsync(string filePath)
{
logger.LogInformation($"ParseCandidatosAsync - Starting to parse 'candidatos' from '{filePath}'");
filePath = csvFixerService.FixCsvFile(filePath);
// Fix the CSV file if necessary
if (string.IsNullOrEmpty(filePath))
{
logger.LogError($"ParseCandidatosAsync - Failed to fix CSV file at '{filePath}'");
throw new InvalidOperationException($"Failed to fix CSV file at '{filePath}'");
}
try
{
using var reader = new StreamReader(filePath);
using var csv = new CsvReader(reader, parserConfig);
var po = new ParallelOptions
{
MaxDegreeOfParallelism = 25
};
csv.Context.RegisterClassMap<CandidatoMap>();
var records = csv.GetRecords<CandidatoCSV>();
await Parallel.ForEachAsync(records, po, async (record, ct) =>
{
try
{
if (string.IsNullOrWhiteSpace(record.CPFCandidato) || record.CPFCandidato.Length <= 3)
{
record.CPFCandidato = null; // Handle null/empty/whitespace CPF
}
if (record.NomeCandidato == "NÃO DIVULGÁVEL" ||
string.IsNullOrEmpty(record.NomeCandidato) ||
record.NomeCandidato == "#NULO")
{
logger.LogCritical($"ParseCandidatosAsync - Candidate with id {record.SequencialCandidato} with invalid name, skipping...");
return; // Skip candidates with invalid name
}
var candidato = new Candidato
{
Cpf = record.CPFCandidato,
SqCandidato = record.SequencialCandidato,
Nome = record.NomeCandidato,
Apelido = record.Apelido,
Email = record.Email.Contains("@") ? record.Email : null,
Sexo = record.Genero,
EstadoCivil = record.EstadoCivil,
Escolaridade = record.GrauInstrucao,
Ocupacao = record.Ocupacao,
Eleicoes = new List<CandidatoMapping>()
{
new CandidatoMapping
{
Cpf = record.CPFCandidato,
Nome = record.NomeCandidato,
Apelido = record.Apelido,
SqCandidato = record.SequencialCandidato,
Ano = record.AnoEleicao,
TipoEleicao = record.TipoAbrangencia,
NomeUE = record.NomeUE,
SiglaUF = record.SiglaUF,
Cargo = record.DescricaoCargo,
NrCandidato = record.NumeroCandidato,
Resultado = record.SituacaoTurno,
Partido = new Partido
{
Sigla = record.SiglaPartido,
Nome = record.NomePartido,
Numero = record.NumeroPartido,
}
}
}
};
if (!string.IsNullOrEmpty(record.DataNascimento) &&
record.DataNascimento != "#NULO")
{
if (DateTime.TryParseExact(record.DataNascimento, "dd/MM/yyyy",
CultureInfo.InvariantCulture, DateTimeStyles.AssumeLocal, out var dataNascimento))
{
// Convert to UTC DateTime to work with PostgreSQL timestamp with time zone
candidato.DataNascimento = DateTime.SpecifyKind(dataNascimento, DateTimeKind.Utc);
}
}
else
{
candidato.DataNascimento = null; // Handle null/empty/whitespace date
}
await candidatoService.AddCandidatoAsync(candidato);
}
catch (Exception ex)
{
logger.LogError(ex, "ParseCandidatosAsync - Error processing candidate with id {CandidatoId}", record.SequencialCandidato);
}
});
logger.LogInformation("ParseCandidatosAsync - Finished parsing candidatos from {FilePath}", filePath);
}
catch (Exception ex)
{
logger.LogError(ex, "ParseCandidatosAsync - Error parsing candidatos file {FilePath}", filePath);
throw;
}
}
public async Task ParseBensCandidatosAsync(string filePath)
{
logger.LogInformation($"ParseBensCandidatosAsync - Starting to parse bens candidatos from '{filePath}'");
filePath = csvFixerService.FixCsvFile(filePath);
// Fix the CSV file if necessary
if (string.IsNullOrEmpty(filePath))
{
logger.LogError($"ParseBensCandidatosAsync - Failed to fix CSV file at '{filePath}'");
throw new InvalidOperationException($"Failed to fix CSV file at '{filePath}'");
}
try
{
using var reader = new StreamReader(filePath);
using var csv = new CsvReader(reader, parserConfig);
csv.Context.RegisterClassMap<BemCandidatoMap>();
var records = csv.GetRecords<BemCandidatoCSV>();
foreach (var record in records)
{
try
{
// Parse decimal value
decimal? valor = null;
if (!string.IsNullOrEmpty(record.ValorBemCandidato))
{
string normalizedValue = record.ValorBemCandidato.Replace(".", "").Replace(",", ".");
if (decimal.TryParse(normalizedValue, NumberStyles.Any, CultureInfo.InvariantCulture, out var parsedValue))
{
valor = parsedValue;
}
}
var bemCandidato = new BemCandidato
{
SqCandidato = record.SequencialCandidato,
Ano = record.AnoEleicao,
SiglaUF = record.SiglaUF,
NomeUE = record.NomeUE,
OrdemBem = record.NumeroOrdemBemCandidato,
TipoBem = record.DescricaoTipoBemCandidato,
Descricao = record.DescricaoBemCandidato,
Valor = valor
};
await bemCandidatoService.AddBemCandidatoAsync(bemCandidato);
}
catch (Exception ex)
{
logger.LogError(ex, "ParseBensCandidatosAsync - Error processing bem candidato with id {CandidatoId} and ordem {OrdemBem}",
record.SequencialCandidato, record.NumeroOrdemBemCandidato);
}
}
logger.LogInformation("ParseBensCandidatosAsync - Finished parsing bens candidatos from {FilePath}", filePath);
}
catch (Exception ex)
{
logger.LogError(ex, "ParseBensCandidatosAsync - Error parsing bens candidatos file {FilePath}", filePath);
throw;
}
}
public async Task ParseRedeSocialAsync(string filePath)
{
logger.LogInformation($"ParseRedeSocialAsync - Starting to parse redes sociais from '{filePath}'");
filePath = csvFixerService.FixCsvFile(filePath);
// Fix the CSV file if necessary
if (string.IsNullOrEmpty(filePath))
{
logger.LogError($"ParseRedeSocialAsync - Failed to fix CSV file at '{filePath}'");
throw new InvalidOperationException($"Failed to fix CSV file at '{filePath}'");
}
try
{
using var reader = new StreamReader(filePath);
using var csv = new CsvReader(reader, parserConfig);
csv.Context.RegisterClassMap<RedeSocialMap>();
var records = csv.GetRecords<RedeSocialCSV>();
foreach (var record in records)
{
try
{
var redeSocial = new RedeSocial
{
SqCandidato = record.SequencialCandidato,
Ano = record.DataEleicao,
SiglaUF = record.SiglaUF,
Link = record.Url,
Rede = string.Empty
};
await redeSocialService.AddRedeSocialAsync(redeSocial);
}
catch (Exception ex)
{
logger.LogError(ex, "ParseRedeSocialAsync - Error processing redes sociais with id {SequencialCandidato} and link {Url}",
record.SequencialCandidato, record.Url);
}
}
logger.LogInformation("ParseRedeSocialAsync - Finished parsing redes sociais from {FilePath}", filePath);
}
catch (Exception ex)
{
logger.LogError(ex, "ParseRedeSocialAsync - Error parsing redes sociais file {FilePath}", filePath);
throw;
}
}
}
}

View File

@ -3,8 +3,11 @@ using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Hosting; using Microsoft.Extensions.Hosting;
using Microsoft.Extensions.Logging; using Microsoft.Extensions.Logging;
using OpenCand.Config; using OpenCand.Config;
using OpenCand.ETL.Contracts;
using OpenCand.ETL.Parser.ParserServices;
using OpenCand.ETL.Repository; using OpenCand.ETL.Repository;
using OpenCand.Parser; using OpenCand.Parser;
using OpenCand.Parser.Models;
using OpenCand.Parser.Services; using OpenCand.Parser.Services;
using OpenCand.Repository; using OpenCand.Repository;
using OpenCand.Services; using OpenCand.Services;
@ -52,9 +55,14 @@ namespace OpenCand
{ {
// Configuration // Configuration
services.Configure<CsvSettings>(hostContext.Configuration.GetSection("CsvSettings")); services.Configure<CsvSettings>(hostContext.Configuration.GetSection("CsvSettings"));
// Services // Services
services.AddTransient<CsvParserService>(); services.AddTransient<IParserService<CandidatoCSV>, CandidatoParserService>();
services.AddTransient<IParserService<BemCandidatoCSV>, BemCandidatoParserService>();
services.AddTransient<IParserService<RedeSocialCSV>, RedeSocialParserService>();
services.AddTransient<CsvParserService<CandidatoCSV>>();
services.AddTransient<CsvParserService<BemCandidatoCSV>>();
services.AddTransient<CsvParserService<RedeSocialCSV>>();
services.AddTransient<ParserManager>(); services.AddTransient<ParserManager>();
services.AddTransient<CandidatoService>(); services.AddTransient<CandidatoService>();
services.AddTransient<BemCandidatoService>(); services.AddTransient<BemCandidatoService>();

View File

@ -50,7 +50,8 @@ namespace OpenCand.Repository
{ {
await connection.ExecuteAsync(@" await connection.ExecuteAsync(@"
INSERT INTO candidato_mapping (idcandidato, cpf, nome, apelido, sqcandidato, ano, tipoeleicao, siglauf, nomeue, cargo, nrcandidato, sgpartido, resultado) INSERT INTO candidato_mapping (idcandidato, cpf, nome, apelido, sqcandidato, ano, tipoeleicao, siglauf, nomeue, cargo, nrcandidato, sgpartido, resultado)
VALUES (@idcandidato, @cpf, @nome, @apelido, @sqcandidato, @ano, @tipoeleicao, @siglauf, @nomeue, @cargo, @nrcandidato, @sgpartido, @resultado);", VALUES (@idcandidato, @cpf, @nome, @apelido, @sqcandidato, @ano, @tipoeleicao, @siglauf, @nomeue, @cargo, @nrcandidato, @sgpartido, @resultado)
ON CONFLICT DO NOTHING;",
new new
{ {
idcandidato = candidatoMapping.IdCandidato, idcandidato = candidatoMapping.IdCandidato,
@ -70,27 +71,45 @@ namespace OpenCand.Repository
} }
} }
public async Task<List<CandidatoMapping>?> GetCandidatoMappingByCpf(string cpf) public async Task<Candidato?> GetCandidatoByCpf(string cpf)
{ {
using (var connection = new NpgsqlConnection(ConnectionString)) using (var connection = new NpgsqlConnection(ConnectionString))
{ {
var query = @" var query = @"
SELECT idcandidato, cpf, nome, sqcandidato, ano, tipoeleicao, siglauf, nomeue, cargo, nrcandidato, resultado SELECT *
FROM candidato_mapping FROM candidato
WHERE cpf = @cpf"; WHERE cpf = @cpf";
return (await connection.QueryAsync<CandidatoMapping>(query, new { cpf })).AsList(); return await connection.QueryFirstOrDefaultAsync<Candidato>(query, new { cpf });
} }
} }
public async Task<List<CandidatoMapping>?> GetCandidatoMappingByNome(string nome, string sqcandidato, int ano, string siglauf, string nomeue, string nrcandidato) public async Task<Candidato?> GetCandidatoByNome(string nome, DateTime datanascimento)
{ {
using (var connection = new NpgsqlConnection(ConnectionString)) using (var connection = new NpgsqlConnection(ConnectionString))
{ {
var query = @" var query = @"
SELECT idcandidato, cpf, nome, sqcandidato, ano, tipoeleicao, siglauf, nomeue, cargo, nrcandidato, resultado SELECT *
FROM candidato
WHERE nome = @nome AND datanascimento = @datanascimento";
return await connection.QueryFirstOrDefaultAsync<Candidato>(query, new { nome, datanascimento });
}
}
public async Task<CandidatoMapping?> GetCandidatoMappingByDetails(string nome, int ano, string cargo, string siglauf, string nomeue, string nrcandidato, string resultado)
{
using (var connection = new NpgsqlConnection(ConnectionString))
{
var query = @"
SELECT *
FROM candidato_mapping FROM candidato_mapping
WHERE nome = @nome AND sqcandidato = @sqcandidato AND ano = @ano AND siglauf = @siglauf AND nomeue = @nomeue AND nrcandidato = @nrcandidato"; WHERE nome = @nome AND
return (await connection.QueryAsync<CandidatoMapping>(query, new { nome, sqcandidato, ano, siglauf, nomeue, nrcandidato })).AsList(); ano = @ano AND
cargo = @cargo AND
siglauf = @siglauf AND
nomeue = @nomeue AND
nrcandidato = @nrcandidato AND
resultado = @resultado";
return await connection.QueryFirstOrDefaultAsync<CandidatoMapping>(query, new { nome, ano, cargo, siglauf, nomeue, nrcandidato, resultado });
} }
} }

View File

@ -35,57 +35,59 @@ namespace OpenCand.Services
await partidoRepository.AddPartidoAsync(candidatoMapping.Partido); await partidoRepository.AddPartidoAsync(candidatoMapping.Partido);
} }
List<CandidatoMapping>? mappings = null; // Check if the candidate already exists in the database
CandidatoMapping? existingMapping = null; Candidato? existingCandidato;
if (candidato.Cpf == null || candidato.Cpf.Length != 11) if (candidato.Cpf == null || candidato.Cpf.Length != 11)
{ {
// If CPF is not provided or invalid, we STRICTLY search by name and other properties existingCandidato = await candidatoRepository.GetCandidatoByNome(candidato.Nome, candidato.DataNascimento.GetValueOrDefault());
mappings = await candidatoRepository.GetCandidatoMappingByNome(candidato.Nome,
candidato.SqCandidato,
candidatoMapping.Ano,
candidatoMapping.SiglaUF,
candidatoMapping.NomeUE,
candidatoMapping.NrCandidato);
} }
else else
{ {
mappings = await candidatoRepository.GetCandidatoMappingByCpf(candidato.Cpf); existingCandidato = await candidatoRepository.GetCandidatoByCpf(candidato.Cpf);
} }
// If the candidate already exists, we can update the mappings
if (existingCandidato != null)
{
candidato.IdCandidato = existingCandidato.IdCandidato;
candidato.Cpf = GetNonEmptyString(existingCandidato.Cpf, candidato.Cpf);
candidato.Email = GetNonEmptyString(existingCandidato.Email, candidato.Email);
candidato.EstadoCivil = GetNonEmptyString(existingCandidato.EstadoCivil, candidato.EstadoCivil);
candidato.Apelido = GetNonEmptyString(existingCandidato.Apelido, candidato.Apelido);
candidato.Escolaridade = GetNonEmptyString(existingCandidato.Escolaridade, candidato.Escolaridade);
candidato.Ocupacao = GetNonEmptyString(existingCandidato.Ocupacao, candidato.Ocupacao);
candidato.Sexo = GetNonEmptyString(existingCandidato.Sexo, candidato.Sexo);
candidatoMapping.IdCandidato = candidato.IdCandidato;
candidatoMapping.Cpf = GetNonEmptyString(candidato.Cpf, candidatoMapping.Cpf);
// Update the entries for the existing candidate
await candidatoRepository.AddCandidatoAsync(candidato);
await candidatoRepository.AddCandidatoMappingAsync(candidatoMapping);
return;
}
// If the candidate does not exist, we create a new one
CandidatoMapping? existingMapping = await candidatoRepository.GetCandidatoMappingByDetails(candidato.Nome,
candidatoMapping.Ano,
candidatoMapping.Cargo,
candidatoMapping.SiglaUF,
candidatoMapping.NomeUE,
candidatoMapping.NrCandidato,
candidatoMapping.Resultado);
// Check if exists // Check if exists
if (mappings != null && mappings.Count > 0) if (existingMapping != null)
{ {
existingMapping = mappings.FirstOrDefault(m => m.Ano == candidatoMapping.Ano && candidato.IdCandidato = existingMapping.IdCandidato;
m.Cargo == candidatoMapping.Cargo && candidato.Cpf = GetNonEmptyString(existingMapping.Cpf, candidato.Cpf);
m.SiglaUF == candidatoMapping.SiglaUF && candidato.Apelido = GetNonEmptyString(existingMapping.Apelido, candidato.Apelido);
m.NomeUE == candidatoMapping.NomeUE && await candidatoRepository.AddCandidatoAsync(candidato);
m.NrCandidato == candidatoMapping.NrCandidato &&
m.Resultado == candidatoMapping.Resultado);
// Already exists one for the current election return;
if (existingMapping != null) }
{
candidato.IdCandidato = existingMapping.IdCandidato;
candidato.Cpf = existingMapping.Cpf;
await candidatoRepository.AddCandidatoAsync(candidato); // No current mapping, we create a new one and create a new mapping for the current election
return; candidato.IdCandidato = Guid.NewGuid();
}
// If exists (but not for the current election), we take the existing idcandidato
// and create a new mapping for the current election
else
{
existingMapping = mappings.First();
candidato.IdCandidato = existingMapping.IdCandidato;
candidato.Cpf = existingMapping.Cpf;
}
}
else
{
// No current mapping, we create a new one
// and create a new mapping for the current election
candidato.IdCandidato = Guid.NewGuid();
}
// Set the mapping properties // Set the mapping properties
candidatoMapping.IdCandidato = candidato.IdCandidato; candidatoMapping.IdCandidato = candidato.IdCandidato;
@ -96,5 +98,18 @@ namespace OpenCand.Services
await candidatoRepository.AddCandidatoMappingAsync(candidatoMapping); await candidatoRepository.AddCandidatoMappingAsync(candidatoMapping);
} }
public string GetNonEmptyString(string? value1, string? value2)
{
if (!string.IsNullOrWhiteSpace(value1))
{
return value1;
}
else if (!string.IsNullOrWhiteSpace(value2))
{
return value2;
}
return string.Empty;
}
} }
} }

View File

@ -31,7 +31,7 @@ CREATE TABLE candidato_mapping (
siglauf VARCHAR(2), siglauf VARCHAR(2),
nomeue VARCHAR(100), nomeue VARCHAR(100),
cargo VARCHAR(50), cargo VARCHAR(50),
sgpartido VARCHAR(10), sgpartido VARCHAR(50),
nrcandidato VARCHAR(20), nrcandidato VARCHAR(20),
resultado VARCHAR(50), resultado VARCHAR(50),
CONSTRAINT pk_candidato_mapping PRIMARY KEY (idcandidato, ano, siglauf, nomeue, cargo, nrcandidato, resultado), CONSTRAINT pk_candidato_mapping PRIMARY KEY (idcandidato, ano, siglauf, nomeue, cargo, nrcandidato, resultado),
@ -71,7 +71,7 @@ CREATE INDEX idx_rede_social_idcandidato ON rede_social (idcandidato);
---- Table for storing party information ---- Table for storing party information
CREATE TABLE partido ( CREATE TABLE partido (
sigla VARCHAR(10) NOT NULL PRIMARY KEY, sigla VARCHAR(50) NOT NULL PRIMARY KEY,
nome VARCHAR(255) NOT NULL, nome VARCHAR(255) NOT NULL,
numero INT NOT NULL numero INT NOT NULL
); );