Files
mindforge/mindforge.cronjob/internal/git/git.go
Jose Henrique afff091457
All checks were successful
Mindforge Cronjob Build and Deploy / Build Mindforge Cronjob Image (push) Successful in 1m19s
Mindforge Cronjob Build and Deploy / Deploy Mindforge Cronjob (internal) (push) Successful in 43s
adding top_n_files
2026-03-20 21:25:19 -03:00

202 lines
5.9 KiB
Go

package git
import (
"bytes"
"fmt"
"os"
"os/exec"
"sort"
"strings"
"time"
)
// Service defines the interface for git operations
type Service interface {
CheckConnection(url string) error
FetchContents(url string) error
// GetModifications returns the diffs of the top-N most-changed files (by lines
// added/removed) modified within the last 'days' days. Files with 4 or fewer
// changed lines are always excluded. Pass topN <= 0 to return all qualifying files.
GetModifications(days int, topN int) (map[string]string, error)
}
type gitService struct {
repoDir string
}
// NewGitService creates a new Git service
func NewGitService() Service {
return &gitService{
repoDir: "./cloned_repo",
}
}
func prepareSSHKey() (string, bool) {
b, err := os.ReadFile("/root/.ssh/id_rsa")
if err != nil {
return "", false
}
// Fix literal escaped newlines and CRLF issues that cause libcrypto errors
content := strings.ReplaceAll(string(b), "\\n", "\n")
content = strings.ReplaceAll(content, "\r", "")
// Ensure there is a trailing newline
if !strings.HasSuffix(content, "\n") {
content += "\n"
}
tmpPath := "/tmp/id_rsa"
if err := os.WriteFile(tmpPath, []byte(content), 0600); err != nil {
return "", false
}
return tmpPath, true
}
func (s *gitService) CheckConnection(url string) error {
cmd := exec.Command("git", "ls-remote", url)
if keyPath, ok := prepareSSHKey(); ok {
cmd.Env = append(os.Environ(), fmt.Sprintf("GIT_SSH_COMMAND=ssh -i %s -o StrictHostKeyChecking=no", keyPath))
}
if err := cmd.Run(); err != nil {
return fmt.Errorf("failed to check git connection: %w", err)
}
fmt.Println("Git connection checked successfully")
return nil
}
func (s *gitService) FetchContents(url string) error {
// Remove the repo directory if it already exists from a previous run
fmt.Println("Removing repo directory")
_ = os.RemoveAll(s.repoDir)
fmt.Println("Cloning repository")
var cmd *exec.Cmd
if keyPath, ok := prepareSSHKey(); ok {
cmd = exec.Command("git", "clone", url, s.repoDir)
cmd.Env = append(os.Environ(), fmt.Sprintf("GIT_SSH_COMMAND=ssh -i %s -o StrictHostKeyChecking=no", keyPath))
} else {
cmd = exec.Command("git", "clone", url, s.repoDir)
}
var stderr bytes.Buffer
cmd.Stderr = &stderr
if err := cmd.Run(); err != nil {
return fmt.Errorf("failed to fetch contents: %w, stderr: %s", err, stderr.String())
}
fmt.Println("Repository cloned successfully")
return nil
}
func (s *gitService) GetModifications(days int, topN int) (map[string]string, error) {
mods := make(map[string]string)
// Determine the commit to diff against (the latest commit *before* 'days' ago)
since := time.Now().AddDate(0, 0, -days).Format(time.RFC3339)
cmdBase := exec.Command("git", "rev-list", "-1", "--before", since, "HEAD")
cmdBase.Dir = s.repoDir
out, err := cmdBase.Output()
baseCommit := strings.TrimSpace(string(out))
if err != nil || baseCommit == "" {
// If there is no commit before 'days' ago, diff against the empty tree
// (this gets all files created in the repository's entire history if it's newer than 'days')
baseCommit = "4b825dc642cb6eb9a060e54bf8d69288fbee4904"
}
// Get the list of modified files between the base commit and HEAD
cmdFiles := exec.Command("git", "-c", "core.quotePath=false", "diff", "--name-only", baseCommit, "HEAD")
cmdFiles.Dir = s.repoDir
filesOut, err := cmdFiles.Output()
if err != nil {
return nil, fmt.Errorf("failed to get modified files: %w", err)
}
files := strings.Split(strings.TrimSpace(string(filesOut)), "\n")
for _, file := range files {
fmt.Printf("Processing file: %s\n", file)
if file == "" {
continue
}
// Filter only .md files
if !strings.HasSuffix(file, ".md") {
continue
}
// Skip files with "Conteúdos.md" in the path
if strings.Contains(file, "Conteúdos.md") {
continue
}
originalFile := file
// Remove first folder from file path
file = strings.Join(strings.Split(file, "/")[1:], "/")
// Note: 'git diff' compares the beginning and end trees, so it has no native concept
// of ignoring intermediate commits. To skip the changes made in "refactor" commits
// without using loops, we use `git log -p` combined with `--invert-grep` to
// natively output the diffs of only the non-refactor commits for this file.
rangeStr := "HEAD"
if baseCommit != "4b825dc642cb6eb9a060e54bf8d69288fbee4904" {
rangeStr = baseCommit + "..HEAD"
}
cmdDiff := exec.Command("git", "-c", "core.quotePath=false", "log", "-p", "-i", "--invert-grep", "--grep=refactor", rangeStr, "--", originalFile)
cmdDiff.Dir = s.repoDir
diffOut, err := cmdDiff.Output()
if err != nil {
return nil, fmt.Errorf("failed to get diff for file %s: %w", originalFile, err)
}
if len(diffOut) > 0 {
mods[file] = string(diffOut)
}
}
// Count the number of changed lines (additions + deletions) per file.
// Lines starting with '+' or '-' are changed lines; lines starting with '+++'
// or '---' are the diff file headers and must be excluded.
type fileScore struct {
name string
score int
}
scores := make([]fileScore, 0, len(mods))
for name, diff := range mods {
count := 0
for _, line := range strings.Split(diff, "\n") {
if (strings.HasPrefix(line, "+") || strings.HasPrefix(line, "-")) &&
!strings.HasPrefix(line, "++") && !strings.HasPrefix(line, "--") {
count++
}
}
// Ignore files with 4 or fewer lines changed
if count <= 4 {
fmt.Printf("Ignoring file %s: %d lines changed\n", name, count)
continue
}
scores = append(scores, fileScore{name: name, score: count})
}
// Sort descending by number of changed lines
sort.Slice(scores, func(i, j int) bool {
return scores[i].score > scores[j].score
})
// Keep only the top-N entries (if topN <= 0, keep all qualifying files)
if topN > 0 && len(scores) > topN {
scores = scores[:topN]
}
result := make(map[string]string, len(scores))
for _, fs := range scores {
result[fs.name] = mods[fs.name]
}
return result, nil
}