From afff091457cea1bf5e1fd36706a4aeb91c9402a3 Mon Sep 17 00:00:00 2001 From: Jose Henrique Date: Fri, 20 Mar 2026 21:25:19 -0300 Subject: [PATCH] adding top_n_files --- mindforge.cronjob/.env.example | 4 +- .../cmd/mindforge.cronjob/main.go | 21 ++++++- .../deploy/mindforge-cronjob.yaml | 4 ++ mindforge.cronjob/internal/git/git.go | 60 +++++++++++++++++-- 4 files changed, 81 insertions(+), 8 deletions(-) diff --git a/mindforge.cronjob/.env.example b/mindforge.cronjob/.env.example index b126bf8..ce61d0f 100644 --- a/mindforge.cronjob/.env.example +++ b/mindforge.cronjob/.env.example @@ -9,4 +9,6 @@ SUMMARY_FORMATTER_PROVIDER=openai # LLM models GEMINI_MODEL=gemini-3-flash-preview -OPENAI_MODEL=gpt-5-mini \ No newline at end of file +OPENAI_MODEL=gpt-5-mini + +TOP_N_FILES=10 \ No newline at end of file diff --git a/mindforge.cronjob/cmd/mindforge.cronjob/main.go b/mindforge.cronjob/cmd/mindforge.cronjob/main.go index e6773e1..0af1192 100644 --- a/mindforge.cronjob/cmd/mindforge.cronjob/main.go +++ b/mindforge.cronjob/cmd/mindforge.cronjob/main.go @@ -4,6 +4,7 @@ import ( "fmt" "log" "os" + "strconv" "github.com/joho/godotenv" "mindforge.cronjob/internal/agent" @@ -27,6 +28,14 @@ func main() { // Initialize services gitService := git.NewGitService() + // Resolve how many top files to return (TOP_N_FILES env var, default 10) + topN := 10 + if v := os.Getenv("TOP_N_FILES"); v != "" { + if n, err := strconv.Atoi(v); err == nil && n > 0 { + topN = n + } + } + // Get modifications var modifications map[string]string error := gitService.FetchContents(gitRepo) @@ -34,7 +43,15 @@ func main() { log.Println("ERROR: Failed to fetch contents:", error) } - modifications, error = gitService.GetModifications(7) + // Resolve how many days to look back (LAST_N_DAYS env var, default 7) + days := 7 + if v := os.Getenv("LAST_N_DAYS"); v != "" { + if n, err := strconv.Atoi(v); err == nil && n > 0 { + days = n + } + } + + modifications, error = gitService.GetModifications(days, topN) if error != nil { log.Println("ERROR: Failed to get modifications:", error) } @@ -42,7 +59,7 @@ func main() { fmt.Printf("Found %d modifications\n", len(modifications)) for file, content := range modifications { - fmt.Printf("File: %s\n", file) + fmt.Printf("Processing file: %s\n", file) raw_summary, err := agent.SummaryCreatorAgent(file, content) if err != nil { diff --git a/mindforge.cronjob/deploy/mindforge-cronjob.yaml b/mindforge.cronjob/deploy/mindforge-cronjob.yaml index ddf7b4e..ab4a6f1 100644 --- a/mindforge.cronjob/deploy/mindforge-cronjob.yaml +++ b/mindforge.cronjob/deploy/mindforge-cronjob.yaml @@ -50,6 +50,10 @@ spec: value: gemini-3-flash-preview - name: OPENAI_MODEL value: gpt-5-mini + - name: TOP_N_FILES + value: "10" + - name: LAST_N_DAYS + value: "7" resources: requests: memory: "256Mi" diff --git a/mindforge.cronjob/internal/git/git.go b/mindforge.cronjob/internal/git/git.go index 5c14344..8a63863 100644 --- a/mindforge.cronjob/internal/git/git.go +++ b/mindforge.cronjob/internal/git/git.go @@ -5,6 +5,7 @@ import ( "fmt" "os" "os/exec" + "sort" "strings" "time" ) @@ -13,7 +14,10 @@ import ( type Service interface { CheckConnection(url string) error FetchContents(url string) error - GetModifications(days int) (map[string]string, error) + // GetModifications returns the diffs of the top-N most-changed files (by lines + // added/removed) modified within the last 'days' days. Files with 4 or fewer + // changed lines are always excluded. Pass topN <= 0 to return all qualifying files. + GetModifications(days int, topN int) (map[string]string, error) } type gitService struct { @@ -85,7 +89,7 @@ func (s *gitService) FetchContents(url string) error { return nil } -func (s *gitService) GetModifications(days int) (map[string]string, error) { +func (s *gitService) GetModifications(days int, topN int) (map[string]string, error) { mods := make(map[string]string) // Determine the commit to diff against (the latest commit *before* 'days' ago) @@ -103,7 +107,7 @@ func (s *gitService) GetModifications(days int) (map[string]string, error) { } // Get the list of modified files between the base commit and HEAD - cmdFiles := exec.Command("git", "diff", "--name-only", baseCommit, "HEAD") + cmdFiles := exec.Command("git", "-c", "core.quotePath=false", "diff", "--name-only", baseCommit, "HEAD") cmdFiles.Dir = s.repoDir filesOut, err := cmdFiles.Output() if err != nil { @@ -112,6 +116,8 @@ func (s *gitService) GetModifications(days int) (map[string]string, error) { files := strings.Split(strings.TrimSpace(string(filesOut)), "\n") for _, file := range files { + fmt.Printf("Processing file: %s\n", file) + if file == "" { continue } @@ -121,6 +127,11 @@ func (s *gitService) GetModifications(days int) (map[string]string, error) { continue } + // Skip files with "Conteúdos.md" in the path + if strings.Contains(file, "Conteúdos.md") { + continue + } + originalFile := file // Remove first folder from file path @@ -135,7 +146,7 @@ func (s *gitService) GetModifications(days int) (map[string]string, error) { rangeStr = baseCommit + "..HEAD" } - cmdDiff := exec.Command("git", "log", "-p", "-i", "--invert-grep", "--grep=refactor", rangeStr, "--", originalFile) + cmdDiff := exec.Command("git", "-c", "core.quotePath=false", "log", "-p", "-i", "--invert-grep", "--grep=refactor", rangeStr, "--", originalFile) cmdDiff.Dir = s.repoDir diffOut, err := cmdDiff.Output() if err != nil { @@ -147,5 +158,44 @@ func (s *gitService) GetModifications(days int) (map[string]string, error) { } } - return mods, nil + // Count the number of changed lines (additions + deletions) per file. + // Lines starting with '+' or '-' are changed lines; lines starting with '+++' + // or '---' are the diff file headers and must be excluded. + type fileScore struct { + name string + score int + } + scores := make([]fileScore, 0, len(mods)) + for name, diff := range mods { + count := 0 + for _, line := range strings.Split(diff, "\n") { + if (strings.HasPrefix(line, "+") || strings.HasPrefix(line, "-")) && + !strings.HasPrefix(line, "++") && !strings.HasPrefix(line, "--") { + count++ + } + } + // Ignore files with 4 or fewer lines changed + if count <= 4 { + fmt.Printf("Ignoring file %s: %d lines changed\n", name, count) + continue + } + scores = append(scores, fileScore{name: name, score: count}) + } + + // Sort descending by number of changed lines + sort.Slice(scores, func(i, j int) bool { + return scores[i].score > scores[j].score + }) + + // Keep only the top-N entries (if topN <= 0, keep all qualifying files) + if topN > 0 && len(scores) > topN { + scores = scores[:topN] + } + + result := make(map[string]string, len(scores)) + for _, fs := range scores { + result[fs.name] = mods[fs.name] + } + + return result, nil }