adding top_n_files
All checks were successful
Mindforge Cronjob Build and Deploy / Build Mindforge Cronjob Image (push) Successful in 1m19s
Mindforge Cronjob Build and Deploy / Deploy Mindforge Cronjob (internal) (push) Successful in 43s

This commit is contained in:
2026-03-20 21:25:19 -03:00
parent 510abaa358
commit afff091457
4 changed files with 81 additions and 8 deletions

View File

@@ -9,4 +9,6 @@ SUMMARY_FORMATTER_PROVIDER=openai
# LLM models
GEMINI_MODEL=gemini-3-flash-preview
OPENAI_MODEL=gpt-5-mini
OPENAI_MODEL=gpt-5-mini
TOP_N_FILES=10

View File

@@ -4,6 +4,7 @@ import (
"fmt"
"log"
"os"
"strconv"
"github.com/joho/godotenv"
"mindforge.cronjob/internal/agent"
@@ -27,6 +28,14 @@ func main() {
// Initialize services
gitService := git.NewGitService()
// Resolve how many top files to return (TOP_N_FILES env var, default 10)
topN := 10
if v := os.Getenv("TOP_N_FILES"); v != "" {
if n, err := strconv.Atoi(v); err == nil && n > 0 {
topN = n
}
}
// Get modifications
var modifications map[string]string
error := gitService.FetchContents(gitRepo)
@@ -34,7 +43,15 @@ func main() {
log.Println("ERROR: Failed to fetch contents:", error)
}
modifications, error = gitService.GetModifications(7)
// Resolve how many days to look back (LAST_N_DAYS env var, default 7)
days := 7
if v := os.Getenv("LAST_N_DAYS"); v != "" {
if n, err := strconv.Atoi(v); err == nil && n > 0 {
days = n
}
}
modifications, error = gitService.GetModifications(days, topN)
if error != nil {
log.Println("ERROR: Failed to get modifications:", error)
}
@@ -42,7 +59,7 @@ func main() {
fmt.Printf("Found %d modifications\n", len(modifications))
for file, content := range modifications {
fmt.Printf("File: %s\n", file)
fmt.Printf("Processing file: %s\n", file)
raw_summary, err := agent.SummaryCreatorAgent(file, content)
if err != nil {

View File

@@ -50,6 +50,10 @@ spec:
value: gemini-3-flash-preview
- name: OPENAI_MODEL
value: gpt-5-mini
- name: TOP_N_FILES
value: "10"
- name: LAST_N_DAYS
value: "7"
resources:
requests:
memory: "256Mi"

View File

@@ -5,6 +5,7 @@ import (
"fmt"
"os"
"os/exec"
"sort"
"strings"
"time"
)
@@ -13,7 +14,10 @@ import (
type Service interface {
CheckConnection(url string) error
FetchContents(url string) error
GetModifications(days int) (map[string]string, error)
// GetModifications returns the diffs of the top-N most-changed files (by lines
// added/removed) modified within the last 'days' days. Files with 4 or fewer
// changed lines are always excluded. Pass topN <= 0 to return all qualifying files.
GetModifications(days int, topN int) (map[string]string, error)
}
type gitService struct {
@@ -85,7 +89,7 @@ func (s *gitService) FetchContents(url string) error {
return nil
}
func (s *gitService) GetModifications(days int) (map[string]string, error) {
func (s *gitService) GetModifications(days int, topN int) (map[string]string, error) {
mods := make(map[string]string)
// Determine the commit to diff against (the latest commit *before* 'days' ago)
@@ -103,7 +107,7 @@ func (s *gitService) GetModifications(days int) (map[string]string, error) {
}
// Get the list of modified files between the base commit and HEAD
cmdFiles := exec.Command("git", "diff", "--name-only", baseCommit, "HEAD")
cmdFiles := exec.Command("git", "-c", "core.quotePath=false", "diff", "--name-only", baseCommit, "HEAD")
cmdFiles.Dir = s.repoDir
filesOut, err := cmdFiles.Output()
if err != nil {
@@ -112,6 +116,8 @@ func (s *gitService) GetModifications(days int) (map[string]string, error) {
files := strings.Split(strings.TrimSpace(string(filesOut)), "\n")
for _, file := range files {
fmt.Printf("Processing file: %s\n", file)
if file == "" {
continue
}
@@ -121,6 +127,11 @@ func (s *gitService) GetModifications(days int) (map[string]string, error) {
continue
}
// Skip files with "Conteúdos.md" in the path
if strings.Contains(file, "Conteúdos.md") {
continue
}
originalFile := file
// Remove first folder from file path
@@ -135,7 +146,7 @@ func (s *gitService) GetModifications(days int) (map[string]string, error) {
rangeStr = baseCommit + "..HEAD"
}
cmdDiff := exec.Command("git", "log", "-p", "-i", "--invert-grep", "--grep=refactor", rangeStr, "--", originalFile)
cmdDiff := exec.Command("git", "-c", "core.quotePath=false", "log", "-p", "-i", "--invert-grep", "--grep=refactor", rangeStr, "--", originalFile)
cmdDiff.Dir = s.repoDir
diffOut, err := cmdDiff.Output()
if err != nil {
@@ -147,5 +158,44 @@ func (s *gitService) GetModifications(days int) (map[string]string, error) {
}
}
return mods, nil
// Count the number of changed lines (additions + deletions) per file.
// Lines starting with '+' or '-' are changed lines; lines starting with '+++'
// or '---' are the diff file headers and must be excluded.
type fileScore struct {
name string
score int
}
scores := make([]fileScore, 0, len(mods))
for name, diff := range mods {
count := 0
for _, line := range strings.Split(diff, "\n") {
if (strings.HasPrefix(line, "+") || strings.HasPrefix(line, "-")) &&
!strings.HasPrefix(line, "++") && !strings.HasPrefix(line, "--") {
count++
}
}
// Ignore files with 4 or fewer lines changed
if count <= 4 {
fmt.Printf("Ignoring file %s: %d lines changed\n", name, count)
continue
}
scores = append(scores, fileScore{name: name, score: count})
}
// Sort descending by number of changed lines
sort.Slice(scores, func(i, j int) bool {
return scores[i].score > scores[j].score
})
// Keep only the top-N entries (if topN <= 0, keep all qualifying files)
if topN > 0 && len(scores) > topN {
scores = scores[:topN]
}
result := make(map[string]string, len(scores))
for _, fs := range scores {
result[fs.name] = mods[fs.name]
}
return result, nil
}