This commit is contained in:
2025-05-31 10:58:30 -03:00
commit 1cb7645910
48 changed files with 2235 additions and 0 deletions

View File

@@ -0,0 +1,9 @@
namespace OpenCand.Config
{
public class CsvSettings
{
public string CandidatosFolder { get; set; } = string.Empty;
public string BensCandidatosFolder { get; set; } = string.Empty;
public string RedesSociaisFolder { get; set; } = string.Empty;
}
}

View File

@@ -0,0 +1,29 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net8.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="CsvHelper" Version="33.0.1" />
<PackageReference Include="Dapper" Version="2.1.66" />
<PackageReference Include="Microsoft.Extensions.Hosting" Version="9.0.5" />
<PackageReference Include="Npgsql" Version="8.0.2" />
<PackageReference Include="System.Text.Encoding.CodePages" Version="9.0.5" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\OpenCand.Core\OpenCand.Core.csproj" />
</ItemGroup>
<ItemGroup>
<None Update="appsettings.Development.json">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
<None Update="appsettings.json">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
</ItemGroup>
</Project>

View File

@@ -0,0 +1,15 @@
using CsvHelper.Configuration;
using OpenCand.Parser.Models;
using System.Globalization;
namespace OpenCand.Parser.CsvMappers
{
public class BemCandidatoMap : ClassMap<BemCandidatoCSV>
{
public BemCandidatoMap()
{
AutoMap(CultureInfo.InvariantCulture);
// Explicitly handle any special mappings if needed
}
}
}

View File

@@ -0,0 +1,15 @@
using CsvHelper.Configuration;
using OpenCand.Parser.Models;
using System.Globalization;
namespace OpenCand.Parser.CsvMappers
{
public class CandidatoMap : ClassMap<CandidatoCSV>
{
public CandidatoMap()
{
AutoMap(CultureInfo.InvariantCulture);
// Explicitly handle any special mappings if needed
}
}
}

View File

@@ -0,0 +1,14 @@
using System.Globalization;
using CsvHelper.Configuration;
using OpenCand.Parser.Models;
namespace OpenCand.ETL.Parser.CsvMappers
{
public class RedeSocialMap : ClassMap<BemCandidatoCSV>
{
public RedeSocialMap()
{
AutoMap(CultureInfo.InvariantCulture);
}
}
}

View File

@@ -0,0 +1,58 @@
using CsvHelper.Configuration.Attributes;
namespace OpenCand.Parser.Models
{
public class BemCandidatoCSV
{
[Name("DT_GERACAO")]
public string DataGeracao { get; set; }
[Name("HH_GERACAO")]
public string HoraGeracao { get; set; }
[Name("ANO_ELEICAO")]
public int AnoEleicao { get; set; }
[Name("CD_TIPO_ELEICAO")]
public int CodigoTipoEleicao { get; set; }
[Name("NM_TIPO_ELEICAO")]
public string NomeTipoEleicao { get; set; }
[Name("CD_ELEICAO")]
public int CodigoEleicao { get; set; }
[Name("DS_ELEICAO")]
public string DescricaoEleicao { get; set; }
[Name("DT_ELEICAO")]
public string DataEleicao { get; set; }
[Name("SG_UF")]
public string SiglaUF { get; set; }
[Name("SG_UE")]
public string SiglaUE { get; set; }
[Name("NM_UE")]
public string NomeUE { get; set; }
[Name("SQ_CANDIDATO")]
public string SequencialCandidato { get; set; }
[Name("NR_ORDEM_BEM_CANDIDATO")]
public int NumeroOrdemBemCandidato { get; set; }
[Name("CD_TIPO_BEM_CANDIDATO")]
public int CodigoTipoBemCandidato { get; set; }
[Name("DS_TIPO_BEM_CANDIDATO")]
public string DescricaoTipoBemCandidato { get; set; }
[Name("DS_BEM_CANDIDATO")]
public string DescricaoBemCandidato { get; set; }
[Name("VR_BEM_CANDIDATO")]
public string ValorBemCandidato { get; set; }
}
}

View File

@@ -0,0 +1,95 @@
using System;
using CsvHelper.Configuration.Attributes;
namespace OpenCand.Parser.Models
{
public class CandidatoCSV
{
[Name("DT_GERACAO")]
public string DataGeracao { get; set; }
[Name("HH_GERACAO")]
public string HoraGeracao { get; set; }
[Name("ANO_ELEICAO")]
public int AnoEleicao { get; set; }
[Name("CD_TIPO_ELEICAO")]
public int CodigoTipoEleicao { get; set; }
[Name("NM_TIPO_ELEICAO")]
public string NomeTipoEleicao { get; set; }
[Name("NR_TURNO")]
public int NumeroTurno { get; set; }
[Name("CD_ELEICAO")]
public int CodigoEleicao { get; set; }
[Name("DS_ELEICAO")]
public string DescricaoEleicao { get; set; }
[Name("DT_ELEICAO")]
public string DataEleicao { get; set; }
[Name("TP_ABRANGENCIA")]
public string TipoAbrangencia { get; set; }
[Name("SG_UF")]
public string SiglaUF { get; set; }
[Name("SG_UE")]
public string SiglaUE { get; set; }
[Name("NM_UE")]
public string NomeUE { get; set; }
[Name("CD_CARGO")]
public int CodigoCargo { get; set; }
[Name("DS_CARGO")]
public string DescricaoCargo { get; set; }
[Name("SQ_CANDIDATO")]
public string SequencialCandidato { get; set; }
[Name("NR_CANDIDATO")]
public string NumeroCandidato { get; set; }
[Name("NM_CANDIDATO")]
public string NomeCandidato { get; set; }
[Name("NM_URNA_CANDIDATO")]
public string NomeUrnaCandidato { get; set; }
[Name("NM_SOCIAL_CANDIDATO")]
public string NomeSocialCandidato { get; set; }
[Name("NR_CPF_CANDIDATO")]
public string CPFCandidato { get; set; }
[Name("DS_EMAIL", "NM_EMAIL")]
public string Email { get; set; }
[Name("SG_UF_NASCIMENTO")]
public string SiglaUFNascimento { get; set; }
[Name("DT_NASCIMENTO")]
public string DataNascimento { get; set; }
[Name("DS_GENERO")]
public string Genero { get; set; }
[Name("DS_OCUPACAO")]
public string Ocupacao { get; set; }
[Name("DS_ESTADO_CIVIL")]
public string EstadoCivil { get; set; }
[Name("DS_GRAU_INSTRUCAO")]
public string GrauInstrucao { get; set; }
[Name("DS_SIT_TOT_TURNO")]
public string SituacaoTurno { get; set; }
}
}

View File

@@ -0,0 +1,19 @@
using CsvHelper.Configuration.Attributes;
namespace OpenCand.Parser.Models
{
public class RedeSocialCSV
{
[Name("AA_ELEICAO")]
public int DataEleicao { get; set; }
[Name("SG_UF")]
public string SiglaUF { get; set; }
[Name("SQ_CANDIDATO")]
public string SequencialCandidato { get; set; }
[Name("DS_URL")]
public string Url { get; set; }
}
}

View File

@@ -0,0 +1,118 @@
using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using OpenCand.Config;
using OpenCand.Parser.Services;
namespace OpenCand.Parser
{
public class ParserManager
{
private readonly CsvParserService csvParserService;
private readonly ILogger<ParserManager> logger;
private readonly CsvSettings csvSettings;
private readonly IConfiguration configuration;
public ParserManager(
CsvParserService csvParserService,
IOptions<CsvSettings> csvSettings,
ILogger<ParserManager> logger,
IConfiguration configuration)
{
this.csvParserService = csvParserService;
this.logger = logger;
this.csvSettings = csvSettings.Value;
this.configuration = configuration;
}
public async Task ParseFullDataAsync()
{
logger.LogInformation("ParseFullDataAsync - Starting parsing");
// Get the base path from either SampleFolder in csvSettings or the BasePath in configuration
var basePath = configuration.GetValue<string>("BasePath");
if (string.IsNullOrEmpty(basePath))
{
logger.LogError("ParseFullDataAsync - BasePath is not configured in appsettings.json or CsvSettings.SampleFolder");
return;
}
logger.LogInformation("ParseFullDataAsync - Processing will happen with BasePath: {BasePath}", basePath);
try
{
var candidatosDirectory = Path.Combine(basePath, csvSettings.CandidatosFolder);
var bensCandidatosDirectory = Path.Combine(basePath, csvSettings.BensCandidatosFolder);
var redesSociaisDirectory = Path.Combine(basePath, csvSettings.RedesSociaisFolder);
if (Directory.Exists(candidatosDirectory))
{
foreach (var filePath in Directory.GetFiles(candidatosDirectory, "*.csv"))
{
// Check if filePath contains "fix_" prefix
if (filePath.Contains("fix_"))
{
logger.LogInformation("ParseFullDataAsync - Skipping already fixed file: {FilePath}", filePath);
continue;
}
logger.LogInformation("ParseFullDataAsync - Parsing candidatos data from {FilePath}", filePath);
await csvParserService.ParseCandidatosAsync(filePath);
}
}
else
{
logger.LogWarning("ParseFullDataAsync - 'Candidatos' directory not found at {Directory}", candidatosDirectory);
}
if (Directory.Exists(bensCandidatosDirectory))
{
foreach (var filePath in Directory.GetFiles(bensCandidatosDirectory, "*.csv"))
{
// Check if filePath contains "fix_" prefix
if (filePath.Contains("fix_"))
{
logger.LogInformation("ParseFullDataAsync - Skipping already fixed file: {FilePath}", filePath);
continue;
}
logger.LogInformation("ParseFullDataAsync - Parsing bens candidatos data from {FilePath}", filePath);
await csvParserService.ParseBensCandidatosAsync(filePath);
}
}
else
{
logger.LogWarning("ParseFullDataAsync - 'Bens candidatos' directory not found at {Directory}", bensCandidatosDirectory);
}
if (Directory.Exists(redesSociaisDirectory))
{
foreach (var filePath in Directory.GetFiles(redesSociaisDirectory, "*.csv"))
{
// Check if filePath contains "fix_" prefix
if (filePath.Contains("fix_"))
{
logger.LogInformation("ParseFullDataAsync - Skipping already fixed file: {FilePath}", filePath);
continue;
}
logger.LogInformation("ParseFullDataAsync - Parsing redes sociais data from {FilePath}", filePath);
await csvParserService.ParseRedeSocialAsync(filePath);
}
}
else
{
logger.LogWarning("ParseFullDataAsync - 'Redes sociais' directory not found at {Directory}", redesSociaisDirectory);
}
logger.LogInformation("ParseFullDataAsync - Full data parsing completed!");
}
catch (Exception ex)
{
logger.LogError(ex, "ParseFullDataAsync - Error parsing full data set");
throw;
}
}
}
}

View File

@@ -0,0 +1,119 @@
using System.Text;
using Microsoft.Extensions.Logging;
using OpenCand.Repository;
namespace OpenCand.Parser.Services
{
public class CsvFixerService
{
private readonly ILogger<CsvParserService> logger;
public CsvFixerService(
ILogger<CsvParserService> logger)
{
this.logger = logger;
}
public string FixCsvFile(string filePath)
{
var filename = Path.GetFileName(filePath);
var path = Path.GetDirectoryName(filePath);
// Check if the file exists
if (!File.Exists(filePath))
{
logger.LogError($"FixCsvFile - The file at '{filePath}' does not exist");
return string.Empty;
}
if (string.IsNullOrEmpty(filename) || string.IsNullOrEmpty(path))
{
logger.LogError($"FixCsvFile - The file path '{filePath}' is invalid");
return string.Empty;
}
// Fixed file will have the same name but with "fix_" prefix
var newFilePath = Path.Combine(path, $"fix_{filename}");
if (File.Exists(newFilePath))
{
logger.LogWarning($"FixCsvFile - A fixed file already exists at '{newFilePath}'. It will be overwritten.");
}
logger.LogInformation($"FixCsvFile - Starting to fix CSV file at '{filePath}'");
try
{
// Read the file
var lines = File.ReadAllLines(filePath, encoding: Encoding.GetEncoding(1252));
if (lines.Length == 0)
{
logger.LogError($"FixCsvFile - The file at '{filePath}' is empty");
return string.Empty;
}
var newLines = new List<string>();
var headerCount = lines[0].Split(';').Length;
if (headerCount == 0)
{
logger.LogError($"FixCsvFile - The first line of the file at '{filePath}' does not contain any headers");
return string.Empty;
}
logger.LogInformation($"FixCsvFile - Detected {headerCount} headers in the CSV file");
for (int i = 0; i < lines.Length;)
{
var line = lines[i];
var columns = line.Split(';');
var lineJump = 1;
while (columns.Length != headerCount)
{
if (columns.Length > headerCount)
{
logger.LogCritical($"FixCsvFile - Line {i + 1} has {columns.Length} columns, expected {headerCount}. Halting process.");
return string.Empty; // Critical error, cannot fix this line => needs manual intervention
}
logger.LogWarning($"FixCsvFile - Line {i + 1} has {columns.Length} columns, expected {headerCount}. Attempting to fix [i = {lineJump}]...");
// Likely the "original line" had some \n that were processed incorrectly
// Append lines[i + 1] to the current line and re-do the check
if (i + lineJump >= lines.Length)
{
logger.LogCritical($"FixCsvFile - Reached the end of the file while trying to fix line {i + 1}. Cannot continue.");
return string.Empty; // Cannot fix this line, reached the end of the file
}
// Append the next line to the current line
line += lines[i + lineJump];
// Re-split the line to check the number of columns again
columns = line.Split(';');
// increment lineJump
lineJump++;
}
newLines.Add(line);
i += lineJump;
}
// Write the fixed lines to the new filepath
File.WriteAllLines(newFilePath, newLines, Encoding.UTF8);
logger.LogInformation($"FixCsvFile - Successfully fixed CSV file at {newFilePath}");
return newFilePath;
}
catch (Exception ex)
{
logger.LogError(ex, $"FixCsvFile - Error fixing CSV file at {filePath}");
return string.Empty;
}
}
}
}

View File

@@ -0,0 +1,268 @@
using System.Globalization;
using CsvHelper;
using CsvHelper.Configuration;
using Microsoft.Extensions.Logging;
using OpenCand.Core.Models;
using OpenCand.ETL.Parser.CsvMappers;
using OpenCand.Parser.CsvMappers;
using OpenCand.Parser.Models;
using OpenCand.Services;
namespace OpenCand.Parser.Services
{
public class CsvParserService
{
private readonly ILogger<CsvParserService> logger;
private readonly CandidatoService candidatoService;
private readonly BemCandidatoService bemCandidatoService;
private readonly RedeSocialService redeSocialService;
private readonly CsvFixerService csvFixerService;
private readonly CsvConfiguration parserConfig;
public CsvParserService(
ILogger<CsvParserService> logger,
CandidatoService candidatoService,
BemCandidatoService bemCandidatoService,
RedeSocialService redeSocialService,
CsvFixerService csvFixerService)
{
this.logger = logger;
this.candidatoService = candidatoService;
this.bemCandidatoService = bemCandidatoService;
this.redeSocialService = redeSocialService;
this.csvFixerService = csvFixerService;
parserConfig = new CsvConfiguration(CultureInfo.InvariantCulture)
{
Delimiter = ";",
HasHeaderRecord = true,
PrepareHeaderForMatch = args => args.Header.ToLower(),
MissingFieldFound = null,
TrimOptions = TrimOptions.Trim,
Encoding = System.Text.Encoding.UTF8
};
}
public async Task ParseCandidatosAsync(string filePath)
{
logger.LogInformation($"ParseCandidatosAsync - Starting to parse 'candidatos' from '{filePath}'");
filePath = csvFixerService.FixCsvFile(filePath);
// Fix the CSV file if necessary
if (string.IsNullOrEmpty(filePath))
{
logger.LogError($"ParseCandidatosAsync - Failed to fix CSV file at '{filePath}'");
throw new InvalidOperationException($"Failed to fix CSV file at '{filePath}'");
}
try
{
using var reader = new StreamReader(filePath);
using var csv = new CsvReader(reader, parserConfig);
var po = new ParallelOptions
{
MaxDegreeOfParallelism = 100
};
csv.Context.RegisterClassMap<CandidatoMap>();
var records = csv.GetRecords<CandidatoCSV>();
await Parallel.ForEachAsync(records, po, async (record, ct) =>
{
try
{
if (string.IsNullOrWhiteSpace(record.CPFCandidato) || record.CPFCandidato.Length <= 3)
{
record.CPFCandidato = null; // Handle null/empty/whitespace CPF
}
if (record.NomeCandidato == "NÃO DIVULGÁVEL" ||
string.IsNullOrEmpty(record.NomeCandidato) ||
record.NomeCandidato == "#NULO")
{
logger.LogCritical($"ParseCandidatosAsync - Candidate with id {record.SequencialCandidato} with invalid name, skipping...");
return; // Skip candidates with invalid name
}
var candidato = new Candidato
{
Cpf = record.CPFCandidato,
SqCandidato = record.SequencialCandidato,
Nome = record.NomeCandidato,
Email = record.Email.Contains("@") ? record.Email : null,
Sexo = record.Genero,
EstadoCivil = record.EstadoCivil,
Escolaridade = record.GrauInstrucao,
Ocupacao = record.Ocupacao,
Eleicoes = new List<CandidatoMapping>()
{
new CandidatoMapping
{
Cpf = record.CPFCandidato,
Nome = record.NomeCandidato,
SqCandidato = record.SequencialCandidato,
Ano = record.AnoEleicao,
TipoEleicao = record.TipoAbrangencia,
NomeUE = record.NomeUE,
SiglaUF = record.SiglaUF,
Cargo = record.DescricaoCargo,
NrCandidato = record.NumeroCandidato,
Resultado = record.SituacaoTurno,
}
}
};
if (!string.IsNullOrEmpty(record.DataNascimento) &&
record.DataNascimento != "#NULO")
{
if (DateTime.TryParseExact(record.DataNascimento, "dd/MM/yyyy",
CultureInfo.InvariantCulture, DateTimeStyles.AssumeLocal, out var dataNascimento))
{
// Convert to UTC DateTime to work with PostgreSQL timestamp with time zone
candidato.DataNascimento = DateTime.SpecifyKind(dataNascimento, DateTimeKind.Utc);
}
}
else
{
candidato.DataNascimento = null; // Handle null/empty/whitespace date
}
await candidatoService.AddCandidatoAsync(candidato);
}
catch (Exception ex)
{
logger.LogError(ex, "ParseCandidatosAsync - Error processing candidate with id {CandidatoId}", record.SequencialCandidato);
}
});
logger.LogInformation("ParseCandidatosAsync - Finished parsing candidatos from {FilePath}", filePath);
}
catch (Exception ex)
{
logger.LogError(ex, "ParseCandidatosAsync - Error parsing candidatos file {FilePath}", filePath);
throw;
}
}
public async Task ParseBensCandidatosAsync(string filePath)
{
logger.LogInformation($"ParseBensCandidatosAsync - Starting to parse bens candidatos from '{filePath}'");
filePath = csvFixerService.FixCsvFile(filePath);
// Fix the CSV file if necessary
if (string.IsNullOrEmpty(filePath))
{
logger.LogError($"ParseBensCandidatosAsync - Failed to fix CSV file at '{filePath}'");
throw new InvalidOperationException($"Failed to fix CSV file at '{filePath}'");
}
try
{
using var reader = new StreamReader(filePath);
using var csv = new CsvReader(reader, parserConfig);
csv.Context.RegisterClassMap<BemCandidatoMap>();
var records = csv.GetRecords<BemCandidatoCSV>();
foreach (var record in records)
{
try
{
// Parse decimal value
decimal? valor = null;
if (!string.IsNullOrEmpty(record.ValorBemCandidato))
{
string normalizedValue = record.ValorBemCandidato.Replace(".", "").Replace(",", ".");
if (decimal.TryParse(normalizedValue, NumberStyles.Any, CultureInfo.InvariantCulture, out var parsedValue))
{
valor = parsedValue;
}
}
var bemCandidato = new BemCandidato
{
SqCandidato = record.SequencialCandidato,
Ano = record.AnoEleicao,
SiglaUF = record.SiglaUF,
NomeUE = record.NomeUE,
OrdemBem = record.NumeroOrdemBemCandidato,
TipoBem = record.DescricaoTipoBemCandidato,
Descricao = record.DescricaoBemCandidato,
Valor = valor
};
await bemCandidatoService.AddBemCandidatoAsync(bemCandidato);
}
catch (Exception ex)
{
logger.LogError(ex, "ParseBensCandidatosAsync - Error processing bem candidato with id {CandidatoId} and ordem {OrdemBem}",
record.SequencialCandidato, record.NumeroOrdemBemCandidato);
}
}
logger.LogInformation("ParseBensCandidatosAsync - Finished parsing bens candidatos from {FilePath}", filePath);
}
catch (Exception ex)
{
logger.LogError(ex, "ParseBensCandidatosAsync - Error parsing bens candidatos file {FilePath}", filePath);
throw;
}
}
public async Task ParseRedeSocialAsync(string filePath)
{
logger.LogInformation($"ParseRedeSocialAsync - Starting to parse redes sociais from '{filePath}'");
filePath = csvFixerService.FixCsvFile(filePath);
// Fix the CSV file if necessary
if (string.IsNullOrEmpty(filePath))
{
logger.LogError($"ParseRedeSocialAsync - Failed to fix CSV file at '{filePath}'");
throw new InvalidOperationException($"Failed to fix CSV file at '{filePath}'");
}
try
{
using var reader = new StreamReader(filePath);
using var csv = new CsvReader(reader, parserConfig);
csv.Context.RegisterClassMap<RedeSocialMap>();
var records = csv.GetRecords<RedeSocialCSV>();
foreach (var record in records)
{
try
{
var redeSocial = new RedeSocial
{
SqCandidato = record.SequencialCandidato,
Ano = record.DataEleicao,
SiglaUF = record.SiglaUF,
Link = record.Url,
Rede = string.Empty
};
await redeSocialService.AddRedeSocialAsync(redeSocial);
}
catch (Exception ex)
{
logger.LogError(ex, "ParseRedeSocialAsync - Error processing redes sociais with id {SequencialCandidato} and link {Url}",
record.SequencialCandidato, record.Url);
}
}
logger.LogInformation("ParseRedeSocialAsync - Finished parsing redes sociais from {FilePath}", filePath);
}
catch (Exception ex)
{
logger.LogError(ex, "ParseRedeSocialAsync - Error parsing redes sociais file {FilePath}", filePath);
throw;
}
}
}
}

70
OpenCand.ETL/Program.cs Normal file
View File

@@ -0,0 +1,70 @@
using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Hosting;
using Microsoft.Extensions.Logging;
using OpenCand.Config;
using OpenCand.Parser;
using OpenCand.Parser.Services;
using OpenCand.Repository;
using OpenCand.Services;
namespace OpenCand
{
public class Program
{
static async Task Main(string[] args)
{
var host = CreateHostBuilder(args).Build();
using (var scope = host.Services.CreateScope())
{
var services = scope.ServiceProvider;
var logger = services.GetRequiredService<ILogger<Program>>();
try
{
logger.LogInformation("Initializing database");
// make a test connection to the database
logger.LogInformation("Starting data parsing");
var parserManager = services.GetRequiredService<ParserManager>();
await parserManager.ParseFullDataAsync();
logger.LogInformation("Data parsing completed successfully!");
}
catch (Exception ex)
{
logger.LogError(ex, "An error occurred during application startup");
}
}
}
public static IHostBuilder CreateHostBuilder(string[] args) =>
Host.CreateDefaultBuilder(args)
.ConfigureAppConfiguration((hostingContext, config) =>
{
config.AddJsonFile("appsettings.json", optional: false, reloadOnChange: true);
config.AddJsonFile($"appsettings.{hostingContext.HostingEnvironment.EnvironmentName}.json", optional: true);
config.AddEnvironmentVariables();
config.AddCommandLine(args);
System.Text.Encoding.RegisterProvider(System.Text.CodePagesEncodingProvider.Instance);
})
.ConfigureServices((hostContext, services) =>
{
// Configuration
services.Configure<CsvSettings>(hostContext.Configuration.GetSection("CsvSettings"));
// Services
services.AddTransient<CsvParserService>();
services.AddTransient<ParserManager>();
services.AddTransient<CandidatoService>();
services.AddTransient<BemCandidatoService>();
services.AddTransient<RedeSocialService>();
services.AddTransient<CandidatoRepository>();
services.AddTransient<BemCandidatoRepository>();
services.AddTransient<RedeSocialRepository>();
services.AddTransient<CsvFixerService>();
});
}
}

View File

@@ -0,0 +1,10 @@
{
"profiles": {
"OpenCand": {
"commandName": "Project",
"environmentVariables": {
"DOTNET_ENVIRONMENT": "Development"
}
}
}
}

View File

@@ -0,0 +1,17 @@
using Microsoft.Extensions.Configuration;
using Npgsql;
namespace OpenCand.Repository
{
public abstract class BaseRepository
{
protected string ConnectionString { get; private set; }
protected NpgsqlConnection? Connection { get; private set; }
public BaseRepository(IConfiguration configuration)
{
ConnectionString = configuration["DatabaseSettings:ConnectionString"] ??
throw new ArgumentNullException("Connection string not found in configuration");
}
}
}

View File

@@ -0,0 +1,37 @@
using Dapper;
using Microsoft.Extensions.Configuration;
using Npgsql;
using OpenCand.Core.Models;
namespace OpenCand.Repository
{
public class BemCandidatoRepository : BaseRepository
{
public BemCandidatoRepository(IConfiguration configuration) : base(configuration)
{
}
public async Task AddBemCandidatoAsync(BemCandidato bemCandidato)
{
using (var connection = new NpgsqlConnection(ConnectionString))
{
await connection.ExecuteAsync(@"
INSERT INTO bem_candidato (idcandidato, ano, ordembem, tipobem, descricao, valor)
VALUES (@idcandidato, @ano, @ordembem, @tipobem, @descricao, @valor)
ON CONFLICT (idcandidato, ano, ordembem) DO UPDATE SET
tipobem = EXCLUDED.tipobem,
descricao = EXCLUDED.descricao,
valor = EXCLUDED.valor;",
new
{
idcandidato = bemCandidato.IdCandidato,
ano = bemCandidato.Ano,
ordembem = bemCandidato.OrdemBem,
tipobem = bemCandidato.TipoBem,
descricao = bemCandidato.Descricao,
valor = bemCandidato.Valor
});
}
}
}
}

View File

@@ -0,0 +1,122 @@
using Dapper;
using Microsoft.Extensions.Configuration;
using Npgsql;
using OpenCand.Core.Models;
namespace OpenCand.Repository
{
public class CandidatoRepository : BaseRepository
{
public CandidatoRepository(IConfiguration configuration) : base(configuration)
{
}
public async Task AddCandidatoAsync(Candidato candidato)
{
using (var connection = new NpgsqlConnection(ConnectionString))
{
await connection.ExecuteAsync(@"
INSERT INTO candidato (idcandidato, cpf, nome, datanascimento, email, sexo, estadocivil, escolaridade, ocupacao)
VALUES (@idcandidato, @cpf, @nome, @datanascimento, @email, @sexo, @estadocivil, @escolaridade, @ocupacao)
ON CONFLICT (idcandidato) DO UPDATE SET
cpf = EXCLUDED.cpf,
nome = EXCLUDED.nome,
datanascimento = EXCLUDED.datanascimento,
email = EXCLUDED.email,
sexo = EXCLUDED.sexo,
estadocivil = EXCLUDED.estadocivil,
escolaridade = EXCLUDED.escolaridade,
ocupacao = EXCLUDED.ocupacao;",
new
{
idcandidato = candidato.IdCandidato,
cpf = candidato.Cpf,
nome = candidato.Nome,
datanascimento = candidato.DataNascimento,
email = candidato.Email,
sexo = candidato.Sexo,
estadocivil = candidato.EstadoCivil,
escolaridade = candidato.Escolaridade,
ocupacao = candidato.Ocupacao
});
}
}
public async Task AddCandidatoMappingAsync(CandidatoMapping candidatoMapping)
{
using (var connection = new NpgsqlConnection(ConnectionString))
{
await connection.ExecuteAsync(@"
INSERT INTO candidato_mapping (idcandidato, cpf, nome, sqcandidato, ano, tipoeleicao, siglauf, nomeue, cargo, nrcandidato, resultado)
VALUES (@idcandidato, @cpf, @nome, @sqcandidato, @ano, @tipoeleicao, @siglauf, @nomeue, @cargo, @nrcandidato, @resultado);",
new
{
idcandidato = candidatoMapping.IdCandidato,
cpf = candidatoMapping.Cpf,
nome = candidatoMapping.Nome,
sqcandidato = candidatoMapping.SqCandidato,
ano = candidatoMapping.Ano,
tipoeleicao = candidatoMapping.TipoEleicao,
siglauf = candidatoMapping.SiglaUF,
nomeue = candidatoMapping.NomeUE,
nrcandidato = candidatoMapping.NrCandidato,
cargo = candidatoMapping.Cargo,
resultado = candidatoMapping.Resultado
});
}
}
public async Task<List<CandidatoMapping>?> GetCandidatoMappingByCpf(string cpf)
{
using (var connection = new NpgsqlConnection(ConnectionString))
{
var query = @"
SELECT idcandidato, cpf, nome, sqcandidato, ano, tipoeleicao, siglauf, nomeue, cargo, nrcandidato, resultado
FROM candidato_mapping
WHERE cpf = @cpf";
return (await connection.QueryAsync<CandidatoMapping>(query, new { cpf })).AsList();
}
}
public async Task<List<CandidatoMapping>?> GetCandidatoMappingByNome(string nome)
{
using (var connection = new NpgsqlConnection(ConnectionString))
{
var query = @"
SELECT idcandidato, cpf, nome, sqcandidato, ano, tipoeleicao, siglauf, nomeue, cargo, nrcandidato, resultado
FROM candidato_mapping
WHERE nome = @nome";
return (await connection.QueryAsync<CandidatoMapping>(query, new { nome })).AsList();
}
}
public async Task<CandidatoMapping?> GetIdCandidatoBySqCandidato(string sqCandidato, int ano, string siglauf, string nomeue)
{
using (var connection = new NpgsqlConnection(ConnectionString))
{
var query = @"
SELECT idcandidato
FROM candidato_mapping
WHERE sqcandidato = @sqCandidato AND
ano = @ano AND
siglauf = @siglauf AND
nomeue = @nomeue";
return await connection.QueryFirstOrDefaultAsync<CandidatoMapping>(query, new { sqCandidato, ano, siglauf, nomeue });
}
}
public async Task<CandidatoMapping?> GetIdCandidatoBySqCandidato(string sqCandidato, int ano, string siglauf)
{
using (var connection = new NpgsqlConnection(ConnectionString))
{
var query = @"
SELECT idcandidato
FROM candidato_mapping
WHERE sqcandidato = @sqCandidato AND
ano = @ano AND
siglauf = @siglauf";
return await connection.QueryFirstOrDefaultAsync<CandidatoMapping>(query, new { sqCandidato, ano, siglauf });
}
}
}
}

View File

@@ -0,0 +1,34 @@
using Dapper;
using Microsoft.Extensions.Configuration;
using Npgsql;
using OpenCand.Core.Models;
namespace OpenCand.Repository
{
public class RedeSocialRepository : BaseRepository
{
public RedeSocialRepository(IConfiguration configuration) : base(configuration)
{
}
public async Task AddRedeSocialAsync(RedeSocial redeSocial)
{
using (var connection = new NpgsqlConnection(ConnectionString))
{
await connection.ExecuteAsync(@"
INSERT INTO rede_social (idcandidato, rede, siglauf, ano, link)
VALUES (@idcandidato, @rede, @siglauf, @ano, @link)
ON CONFLICT (idcandidato, rede, siglauf, ano) DO UPDATE SET
link = EXCLUDED.link;",
new
{
idcandidato = redeSocial.IdCandidato,
rede = redeSocial.Rede,
siglauf = redeSocial.SiglaUF,
ano = redeSocial.Ano,
link = redeSocial.Link
});
}
}
}
}

View File

@@ -0,0 +1,37 @@
using OpenCand.Core.Models;
using OpenCand.Repository;
namespace OpenCand.Services
{
public class BemCandidatoService
{
private readonly CandidatoRepository candidatoRepository;
private readonly BemCandidatoRepository bemCandidatoRepository;
public BemCandidatoService(CandidatoRepository candidatoRepository, BemCandidatoRepository bemCandidatoRepository)
{
this.candidatoRepository = candidatoRepository;
this.bemCandidatoRepository = bemCandidatoRepository;
}
public async Task AddBemCandidatoAsync(BemCandidato bemCandidato)
{
if (bemCandidato == null || string.IsNullOrWhiteSpace(bemCandidato.SqCandidato))
{
throw new ArgumentNullException(nameof(bemCandidato), "BemCandidato cannot be null");
}
// Get idCandidato from CandidatoRepository
var candidato = await candidatoRepository.GetIdCandidatoBySqCandidato(bemCandidato.SqCandidato, bemCandidato.Ano, bemCandidato.SiglaUF, bemCandidato.NomeUE);
if (candidato == null || candidato.IdCandidato == Guid.Empty)
{
throw new InvalidOperationException($"AddBemCandidatoAsync - Candidato '{bemCandidato.SqCandidato}'/{bemCandidato.Ano}/'{bemCandidato.SiglaUF}'/'{bemCandidato.NomeUE}' not found.");
}
bemCandidato.IdCandidato = candidato.IdCandidato;
await bemCandidatoRepository.AddBemCandidatoAsync(bemCandidato);
}
}
}

View File

@@ -0,0 +1,85 @@
using OpenCand.Core.Models;
using OpenCand.Repository;
namespace OpenCand.Services
{
public class CandidatoService
{
private readonly CandidatoRepository candidatoRepository;
public CandidatoService(CandidatoRepository candidatoRepository)
{
this.candidatoRepository = candidatoRepository;
}
public async Task AddCandidatoAsync(Candidato candidato)
{
if (candidato == null)
{
throw new ArgumentNullException(nameof(candidato), "Candidato cannot be null");
}
if (candidato.Eleicoes == null || candidato.Eleicoes.Count == 0)
{
throw new ArgumentException("Candidato must have at least one mapping", nameof(candidato));
}
var candidatoMapping = candidato.Eleicoes.First();
List<CandidatoMapping>? mappings = null;
CandidatoMapping? existingMapping = null;
if (candidato.Cpf == null || candidato.Cpf.Length != 11)
{
mappings = await candidatoRepository.GetCandidatoMappingByNome(candidato.Nome);
}
else
{
mappings = await candidatoRepository.GetCandidatoMappingByCpf(candidato.Cpf);
}
// Check if exists
if (mappings != null && mappings.Count > 0)
{
existingMapping = mappings.FirstOrDefault(m => m.Ano == candidatoMapping.Ano &&
m.Cargo == candidatoMapping.Cargo &&
m.SiglaUF == candidatoMapping.SiglaUF &&
m.NomeUE == candidatoMapping.NomeUE &&
m.NrCandidato == candidatoMapping.NrCandidato &&
m.Resultado == candidatoMapping.Resultado);
// Already exists one for the current election
if (existingMapping != null)
{
candidato.IdCandidato = existingMapping.IdCandidato;
candidato.Cpf = existingMapping.Cpf;
await candidatoRepository.AddCandidatoAsync(candidato);
return;
}
// If exists (but not for the current election), we take the existing idcandidato
// and create a new mapping for the current election
else
{
existingMapping = mappings.First();
candidato.IdCandidato = existingMapping.IdCandidato;
candidato.Cpf = existingMapping.Cpf;
}
}
else
{
// No current mapping, we create a new one
// and create a new mapping for the current election
candidato.IdCandidato = Guid.NewGuid();
}
// Set the mapping properties
candidatoMapping.IdCandidato = candidato.IdCandidato;
candidatoMapping.Cpf = candidato.Cpf;
candidatoMapping.Nome = candidato.Nome;
await candidatoRepository.AddCandidatoAsync(candidato);
await candidatoRepository.AddCandidatoMappingAsync(candidatoMapping);
}
}
}

View File

@@ -0,0 +1,74 @@
using OpenCand.Core.Models;
using OpenCand.Repository;
namespace OpenCand.Services
{
public class RedeSocialService
{
private readonly CandidatoRepository candidatoRepository;
private readonly RedeSocialRepository redeSocialRepository;
public RedeSocialService(CandidatoRepository candidatoRepository, RedeSocialRepository redeSocialRepository)
{
this.candidatoRepository = candidatoRepository;
this.redeSocialRepository = redeSocialRepository;
}
public async Task AddRedeSocialAsync(RedeSocial redeSocial)
{
if (redeSocial == null || string.IsNullOrWhiteSpace(redeSocial.SqCandidato))
{
throw new ArgumentNullException(nameof(redeSocial), "RedeSocial cannot be null");
}
// Get idCandidato from CandidatoRepository
var candidato = await candidatoRepository.GetIdCandidatoBySqCandidato(redeSocial.SqCandidato, redeSocial.Ano, redeSocial.SiglaUF);
if (candidato == null || candidato.IdCandidato == Guid.Empty)
{
throw new InvalidOperationException($"AddRedeSocialAsync - Candidato '{redeSocial.SqCandidato}'/{redeSocial.Ano}/'{redeSocial.SiglaUF}' not found.");
}
redeSocial.IdCandidato = candidato.IdCandidato;
redeSocial.Rede = GetRedeSocialType(redeSocial.Link);
await redeSocialRepository.AddRedeSocialAsync(redeSocial);
}
private string GetRedeSocialType(string url)
{
switch (url.ToLower())
{
case var s when s.Contains("facebook.com"):
return "Facebook";
case var s when s.Contains("twitter.com"):
case var ss when ss.Contains("x.com"):
return "X/Twitter";
case var s when s.Contains("instagram.com"):
return "Instagram";
case var s when s.Contains("youtube.com"):
return "YouTube";
case var s when s.Contains("linkedin.com"):
return "LinkedIn";
case var s when s.Contains("spotify.com"):
return "Spotify";
case var s when s.Contains("kwai.com"):
return "Kwai";
case var s when s.Contains("tiktok.com"):
return "TikTok";
case var s when s.Contains("threads.com"):
case var ss when ss.Contains("threads.net"):
return "Threads";
case var s when s.Contains("t.me"):
case var ss when ss.Contains("telegram.com"):
return "Telegram";
case var s when s.Contains("api.whatsapp"):
case var ss when ss.Contains("whatsapp.com"):
case var sss when sss.Contains("wa.me"):
return "WhatsApp";
default:
return "Outros";
}
}
}
}

View File

@@ -0,0 +1,18 @@
{
"Logging": {
"LogLevel": {
"Default": "Information",
"Microsoft": "Warning",
"Microsoft.Hosting.Lifetime": "Information"
}
},
"DatabaseSettings": {
"ConnectionString": "Host=localhost;Database=opencand;Username=root;Password=root;Include Error Detail=true;CommandTimeout=300"
},
"CsvSettings": {
"CandidatosFolder": "data\\consulta_cand",
"BensCandidatosFolder": "data\\bem_candidato",
"RedesSociaisFolder": "data\\rede_social"
},
"BasePath": "sample"
}