package git import ( "bytes" "fmt" "os" "os/exec" "sort" "strings" "time" ) // Service defines the interface for git operations type Service interface { CheckConnection(url string) error FetchContents(url string) error // GetModifications returns the diffs of the top-N most-changed files (by lines // added/removed) modified within the last 'days' days. Files with 4 or fewer // changed lines are always excluded. Pass topN <= 0 to return all qualifying files. GetModifications(days int, topN int) (map[string]string, error) } type gitService struct { repoDir string } // NewGitService creates a new Git service func NewGitService() Service { return &gitService{ repoDir: "./cloned_repo", } } func prepareSSHKey() (string, bool) { b, err := os.ReadFile("/root/.ssh/id_rsa") if err != nil { return "", false } // Fix literal escaped newlines and CRLF issues that cause libcrypto errors content := strings.ReplaceAll(string(b), "\\n", "\n") content = strings.ReplaceAll(content, "\r", "") // Ensure there is a trailing newline if !strings.HasSuffix(content, "\n") { content += "\n" } tmpPath := "/tmp/id_rsa" if err := os.WriteFile(tmpPath, []byte(content), 0600); err != nil { return "", false } return tmpPath, true } func (s *gitService) CheckConnection(url string) error { cmd := exec.Command("git", "ls-remote", url) if keyPath, ok := prepareSSHKey(); ok { cmd.Env = append(os.Environ(), fmt.Sprintf("GIT_SSH_COMMAND=ssh -i %s -o StrictHostKeyChecking=no", keyPath)) } if err := cmd.Run(); err != nil { return fmt.Errorf("failed to check git connection: %w", err) } fmt.Println("Git connection checked successfully") return nil } func (s *gitService) FetchContents(url string) error { // Remove the repo directory if it already exists from a previous run fmt.Println("Removing repo directory") _ = os.RemoveAll(s.repoDir) fmt.Println("Cloning repository") var cmd *exec.Cmd if keyPath, ok := prepareSSHKey(); ok { cmd = exec.Command("git", "clone", url, s.repoDir) cmd.Env = append(os.Environ(), fmt.Sprintf("GIT_SSH_COMMAND=ssh -i %s -o StrictHostKeyChecking=no", keyPath)) } else { cmd = exec.Command("git", "clone", url, s.repoDir) } var stderr bytes.Buffer cmd.Stderr = &stderr if err := cmd.Run(); err != nil { return fmt.Errorf("failed to fetch contents: %w, stderr: %s", err, stderr.String()) } fmt.Println("Repository cloned successfully") return nil } func (s *gitService) GetModifications(days int, topN int) (map[string]string, error) { mods := make(map[string]string) // Determine the commit to diff against (the latest commit *before* 'days' ago) since := time.Now().AddDate(0, 0, -days).Format(time.RFC3339) cmdBase := exec.Command("git", "rev-list", "-1", "--before", since, "HEAD") cmdBase.Dir = s.repoDir out, err := cmdBase.Output() baseCommit := strings.TrimSpace(string(out)) if err != nil || baseCommit == "" { // If there is no commit before 'days' ago, diff against the empty tree // (this gets all files created in the repository's entire history if it's newer than 'days') baseCommit = "4b825dc642cb6eb9a060e54bf8d69288fbee4904" } // Get the list of modified files between the base commit and HEAD cmdFiles := exec.Command("git", "-c", "core.quotePath=false", "diff", "--name-only", baseCommit, "HEAD") cmdFiles.Dir = s.repoDir filesOut, err := cmdFiles.Output() if err != nil { return nil, fmt.Errorf("failed to get modified files: %w", err) } files := strings.Split(strings.TrimSpace(string(filesOut)), "\n") for _, file := range files { fmt.Printf("Processing file: %s\n", file) if file == "" { continue } // Filter only .md files if !strings.HasSuffix(file, ".md") { continue } // Skip files with "Conteúdos.md" in the path if strings.Contains(file, "Conteúdos.md") { continue } originalFile := file // Remove first folder from file path file = strings.Join(strings.Split(file, "/")[1:], "/") // Note: 'git diff' compares the beginning and end trees, so it has no native concept // of ignoring intermediate commits. To skip the changes made in "refactor" commits // without using loops, we use `git log -p` combined with `--invert-grep` to // natively output the diffs of only the non-refactor commits for this file. rangeStr := "HEAD" if baseCommit != "4b825dc642cb6eb9a060e54bf8d69288fbee4904" { rangeStr = baseCommit + "..HEAD" } cmdDiff := exec.Command("git", "-c", "core.quotePath=false", "log", "-p", "-i", "--invert-grep", "--grep=refactor", rangeStr, "--", originalFile) cmdDiff.Dir = s.repoDir diffOut, err := cmdDiff.Output() if err != nil { return nil, fmt.Errorf("failed to get diff for file %s: %w", originalFile, err) } if len(diffOut) > 0 { mods[file] = string(diffOut) } } // Count the number of changed lines (additions + deletions) per file. // Lines starting with '+' or '-' are changed lines; lines starting with '+++' // or '---' are the diff file headers and must be excluded. type fileScore struct { name string score int } scores := make([]fileScore, 0, len(mods)) for name, diff := range mods { count := 0 for _, line := range strings.Split(diff, "\n") { if (strings.HasPrefix(line, "+") || strings.HasPrefix(line, "-")) && !strings.HasPrefix(line, "++") && !strings.HasPrefix(line, "--") { count++ } } // Ignore files with 4 or fewer lines changed if count <= 4 { fmt.Printf("Ignoring file %s: %d lines changed\n", name, count) continue } scores = append(scores, fileScore{name: name, score: count}) } // Sort descending by number of changed lines sort.Slice(scores, func(i, j int) bool { return scores[i].score > scores[j].score }) // Keep only the top-N entries (if topN <= 0, keep all qualifying files) if topN > 0 && len(scores) > topN { scores = scores[:topN] } result := make(map[string]string, len(scores)) for _, fs := range scores { result[fs.name] = mods[fs.name] } return result, nil }