277 lines
12 KiB
C#
277 lines
12 KiB
C#
using System.Globalization;
|
|
using CsvHelper;
|
|
using CsvHelper.Configuration;
|
|
using Microsoft.Extensions.Logging;
|
|
using OpenCand.Core.Models;
|
|
using OpenCand.ETL.Parser.CsvMappers;
|
|
using OpenCand.Parser.CsvMappers;
|
|
using OpenCand.Parser.Models;
|
|
using OpenCand.Services;
|
|
|
|
namespace OpenCand.Parser.Services
|
|
{
|
|
public class CsvParserService
|
|
{
|
|
private readonly ILogger<CsvParserService> logger;
|
|
private readonly CandidatoService candidatoService;
|
|
private readonly BemCandidatoService bemCandidatoService;
|
|
private readonly RedeSocialService redeSocialService;
|
|
private readonly CsvFixerService csvFixerService;
|
|
|
|
private readonly CsvConfiguration parserConfig;
|
|
|
|
public CsvParserService(
|
|
ILogger<CsvParserService> logger,
|
|
CandidatoService candidatoService,
|
|
BemCandidatoService bemCandidatoService,
|
|
RedeSocialService redeSocialService,
|
|
CsvFixerService csvFixerService)
|
|
{
|
|
this.logger = logger;
|
|
this.candidatoService = candidatoService;
|
|
this.bemCandidatoService = bemCandidatoService;
|
|
this.redeSocialService = redeSocialService;
|
|
this.csvFixerService = csvFixerService;
|
|
|
|
parserConfig = new CsvConfiguration(CultureInfo.InvariantCulture)
|
|
{
|
|
Delimiter = ";",
|
|
HasHeaderRecord = true,
|
|
PrepareHeaderForMatch = args => args.Header.ToLower(),
|
|
MissingFieldFound = null,
|
|
TrimOptions = TrimOptions.Trim,
|
|
Encoding = System.Text.Encoding.UTF8
|
|
};
|
|
}
|
|
|
|
public async Task ParseCandidatosAsync(string filePath)
|
|
{
|
|
logger.LogInformation($"ParseCandidatosAsync - Starting to parse 'candidatos' from '{filePath}'");
|
|
|
|
filePath = csvFixerService.FixCsvFile(filePath);
|
|
|
|
// Fix the CSV file if necessary
|
|
if (string.IsNullOrEmpty(filePath))
|
|
{
|
|
logger.LogError($"ParseCandidatosAsync - Failed to fix CSV file at '{filePath}'");
|
|
throw new InvalidOperationException($"Failed to fix CSV file at '{filePath}'");
|
|
}
|
|
|
|
try
|
|
{
|
|
using var reader = new StreamReader(filePath);
|
|
using var csv = new CsvReader(reader, parserConfig);
|
|
var po = new ParallelOptions
|
|
{
|
|
MaxDegreeOfParallelism = 25
|
|
};
|
|
|
|
csv.Context.RegisterClassMap<CandidatoMap>();
|
|
|
|
var records = csv.GetRecords<CandidatoCSV>();
|
|
|
|
await Parallel.ForEachAsync(records, po, async (record, ct) =>
|
|
{
|
|
try
|
|
{
|
|
if (string.IsNullOrWhiteSpace(record.CPFCandidato) || record.CPFCandidato.Length <= 3)
|
|
{
|
|
record.CPFCandidato = null; // Handle null/empty/whitespace CPF
|
|
}
|
|
|
|
if (record.NomeCandidato == "NÃO DIVULGÁVEL" ||
|
|
string.IsNullOrEmpty(record.NomeCandidato) ||
|
|
record.NomeCandidato == "#NULO")
|
|
{
|
|
logger.LogCritical($"ParseCandidatosAsync - Candidate with id {record.SequencialCandidato} with invalid name, skipping...");
|
|
return; // Skip candidates with invalid name
|
|
}
|
|
|
|
var candidato = new Candidato
|
|
{
|
|
Cpf = record.CPFCandidato,
|
|
SqCandidato = record.SequencialCandidato,
|
|
Nome = record.NomeCandidato,
|
|
Apelido = record.Apelido,
|
|
Email = record.Email.Contains("@") ? record.Email : null,
|
|
Sexo = record.Genero,
|
|
EstadoCivil = record.EstadoCivil,
|
|
Escolaridade = record.GrauInstrucao,
|
|
Ocupacao = record.Ocupacao,
|
|
Eleicoes = new List<CandidatoMapping>()
|
|
{
|
|
new CandidatoMapping
|
|
{
|
|
Cpf = record.CPFCandidato,
|
|
Nome = record.NomeCandidato,
|
|
Apelido = record.Apelido,
|
|
SqCandidato = record.SequencialCandidato,
|
|
Ano = record.AnoEleicao,
|
|
TipoEleicao = record.TipoAbrangencia,
|
|
NomeUE = record.NomeUE,
|
|
SiglaUF = record.SiglaUF,
|
|
Cargo = record.DescricaoCargo,
|
|
NrCandidato = record.NumeroCandidato,
|
|
Resultado = record.SituacaoTurno,
|
|
Partido = new Partido
|
|
{
|
|
Sigla = record.SiglaPartido,
|
|
Nome = record.NomePartido,
|
|
Numero = record.NumeroPartido,
|
|
}
|
|
}
|
|
}
|
|
};
|
|
|
|
if (!string.IsNullOrEmpty(record.DataNascimento) &&
|
|
record.DataNascimento != "#NULO")
|
|
{
|
|
if (DateTime.TryParseExact(record.DataNascimento, "dd/MM/yyyy",
|
|
CultureInfo.InvariantCulture, DateTimeStyles.AssumeLocal, out var dataNascimento))
|
|
{
|
|
// Convert to UTC DateTime to work with PostgreSQL timestamp with time zone
|
|
candidato.DataNascimento = DateTime.SpecifyKind(dataNascimento, DateTimeKind.Utc);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
candidato.DataNascimento = null; // Handle null/empty/whitespace date
|
|
}
|
|
|
|
await candidatoService.AddCandidatoAsync(candidato);
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
logger.LogError(ex, "ParseCandidatosAsync - Error processing candidate with id {CandidatoId}", record.SequencialCandidato);
|
|
}
|
|
});
|
|
|
|
logger.LogInformation("ParseCandidatosAsync - Finished parsing candidatos from {FilePath}", filePath);
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
logger.LogError(ex, "ParseCandidatosAsync - Error parsing candidatos file {FilePath}", filePath);
|
|
throw;
|
|
}
|
|
}
|
|
|
|
public async Task ParseBensCandidatosAsync(string filePath)
|
|
{
|
|
logger.LogInformation($"ParseBensCandidatosAsync - Starting to parse bens candidatos from '{filePath}'");
|
|
|
|
filePath = csvFixerService.FixCsvFile(filePath);
|
|
|
|
// Fix the CSV file if necessary
|
|
if (string.IsNullOrEmpty(filePath))
|
|
{
|
|
logger.LogError($"ParseBensCandidatosAsync - Failed to fix CSV file at '{filePath}'");
|
|
throw new InvalidOperationException($"Failed to fix CSV file at '{filePath}'");
|
|
}
|
|
|
|
try
|
|
{
|
|
using var reader = new StreamReader(filePath);
|
|
using var csv = new CsvReader(reader, parserConfig);
|
|
csv.Context.RegisterClassMap<BemCandidatoMap>();
|
|
|
|
var records = csv.GetRecords<BemCandidatoCSV>();
|
|
|
|
foreach (var record in records)
|
|
{
|
|
try
|
|
{
|
|
// Parse decimal value
|
|
decimal? valor = null;
|
|
if (!string.IsNullOrEmpty(record.ValorBemCandidato))
|
|
{
|
|
string normalizedValue = record.ValorBemCandidato.Replace(".", "").Replace(",", ".");
|
|
if (decimal.TryParse(normalizedValue, NumberStyles.Any, CultureInfo.InvariantCulture, out var parsedValue))
|
|
{
|
|
valor = parsedValue;
|
|
}
|
|
}
|
|
|
|
var bemCandidato = new BemCandidato
|
|
{
|
|
SqCandidato = record.SequencialCandidato,
|
|
Ano = record.AnoEleicao,
|
|
SiglaUF = record.SiglaUF,
|
|
NomeUE = record.NomeUE,
|
|
OrdemBem = record.NumeroOrdemBemCandidato,
|
|
TipoBem = record.DescricaoTipoBemCandidato,
|
|
Descricao = record.DescricaoBemCandidato,
|
|
Valor = valor
|
|
};
|
|
|
|
await bemCandidatoService.AddBemCandidatoAsync(bemCandidato);
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
logger.LogError(ex, "ParseBensCandidatosAsync - Error processing bem candidato with id {CandidatoId} and ordem {OrdemBem}",
|
|
record.SequencialCandidato, record.NumeroOrdemBemCandidato);
|
|
}
|
|
}
|
|
|
|
logger.LogInformation("ParseBensCandidatosAsync - Finished parsing bens candidatos from {FilePath}", filePath);
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
logger.LogError(ex, "ParseBensCandidatosAsync - Error parsing bens candidatos file {FilePath}", filePath);
|
|
throw;
|
|
}
|
|
}
|
|
|
|
public async Task ParseRedeSocialAsync(string filePath)
|
|
{
|
|
logger.LogInformation($"ParseRedeSocialAsync - Starting to parse redes sociais from '{filePath}'");
|
|
|
|
filePath = csvFixerService.FixCsvFile(filePath);
|
|
|
|
// Fix the CSV file if necessary
|
|
if (string.IsNullOrEmpty(filePath))
|
|
{
|
|
logger.LogError($"ParseRedeSocialAsync - Failed to fix CSV file at '{filePath}'");
|
|
throw new InvalidOperationException($"Failed to fix CSV file at '{filePath}'");
|
|
}
|
|
|
|
try
|
|
{
|
|
using var reader = new StreamReader(filePath);
|
|
using var csv = new CsvReader(reader, parserConfig);
|
|
csv.Context.RegisterClassMap<RedeSocialMap>();
|
|
|
|
var records = csv.GetRecords<RedeSocialCSV>();
|
|
|
|
foreach (var record in records)
|
|
{
|
|
try
|
|
{
|
|
var redeSocial = new RedeSocial
|
|
{
|
|
SqCandidato = record.SequencialCandidato,
|
|
Ano = record.DataEleicao,
|
|
SiglaUF = record.SiglaUF,
|
|
Link = record.Url,
|
|
Rede = string.Empty
|
|
};
|
|
|
|
await redeSocialService.AddRedeSocialAsync(redeSocial);
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
logger.LogError(ex, "ParseRedeSocialAsync - Error processing redes sociais with id {SequencialCandidato} and link {Url}",
|
|
record.SequencialCandidato, record.Url);
|
|
}
|
|
}
|
|
|
|
logger.LogInformation("ParseRedeSocialAsync - Finished parsing redes sociais from {FilePath}", filePath);
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
logger.LogError(ex, "ParseRedeSocialAsync - Error parsing redes sociais file {FilePath}", filePath);
|
|
throw;
|
|
}
|
|
}
|
|
}
|
|
}
|