first-commit

This commit is contained in:
2025-08-25 15:46:12 +08:00
commit f4d95dfff4
5665 changed files with 705359 additions and 0 deletions

469
services/gitdiff/csv.go Normal file
View File

@@ -0,0 +1,469 @@
// Copyright 2021 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package gitdiff
import (
"encoding/csv"
"errors"
"io"
)
const (
unmappedColumn = -1
maxRowsToInspect int = 10
minRatioToMatch float32 = 0.8
)
// TableDiffCellType represents the type of a TableDiffCell.
type TableDiffCellType uint8
// TableDiffCellType possible values.
const (
TableDiffCellUnchanged TableDiffCellType = iota + 1
TableDiffCellChanged
TableDiffCellAdd
TableDiffCellDel
TableDiffCellMovedUnchanged
TableDiffCellMovedChanged
)
// TableDiffCell represents a cell of a TableDiffRow
type TableDiffCell struct {
LeftCell string
RightCell string
Type TableDiffCellType
}
// TableDiffRow represents a row of a TableDiffSection.
type TableDiffRow struct {
RowIdx int
Cells []*TableDiffCell
}
// TableDiffSection represents a section of a DiffFile.
type TableDiffSection struct {
Rows []*TableDiffRow
}
// csvReader wraps a csv.Reader which buffers the first rows.
type csvReader struct {
reader *csv.Reader
buffer [][]string
line int
eof bool
}
// ErrorUndefinedCell is for when a row, column coordinates do not exist in the CSV
var ErrorUndefinedCell = errors.New("undefined cell")
// createCsvReader creates a csvReader and fills the buffer
func createCsvReader(reader *csv.Reader, bufferRowCount int) (*csvReader, error) {
csv := &csvReader{reader: reader}
csv.buffer = make([][]string, bufferRowCount)
for i := 0; i < bufferRowCount && !csv.eof; i++ {
row, err := csv.readNextRow()
if err != nil {
return nil, err
}
csv.buffer[i] = row
}
csv.line = bufferRowCount
return csv, nil
}
// GetRow gets a row from the buffer if present or advances the reader to the requested row. On the end of the file only nil gets returned.
func (csv *csvReader) GetRow(row int) ([]string, error) {
if row < len(csv.buffer) && row >= 0 {
return csv.buffer[row], nil
}
if csv.eof {
return nil, nil
}
for {
fields, err := csv.readNextRow()
if err != nil {
return nil, err
}
if csv.eof {
return nil, nil
}
csv.line++
if csv.line-1 == row {
return fields, nil
}
}
}
func (csv *csvReader) readNextRow() ([]string, error) {
if csv.eof {
return nil, nil
}
row, err := csv.reader.Read()
if err != nil {
if err != io.EOF {
return nil, err
}
csv.eof = true
}
return row, nil
}
// CreateCsvDiff creates a tabular diff based on two CSV readers.
func CreateCsvDiff(diffFile *DiffFile, baseReader, headReader *csv.Reader) ([]*TableDiffSection, error) {
if baseReader != nil && headReader != nil {
return createCsvDiff(diffFile, baseReader, headReader)
}
if baseReader != nil {
return createCsvDiffSingle(baseReader, TableDiffCellDel)
}
return createCsvDiffSingle(headReader, TableDiffCellAdd)
}
// createCsvDiffSingle creates a tabular diff based on a single CSV reader. All cells are added or deleted.
func createCsvDiffSingle(reader *csv.Reader, celltype TableDiffCellType) ([]*TableDiffSection, error) {
var rows []*TableDiffRow
i := 1
for {
row, err := reader.Read()
if err != nil {
if err == io.EOF {
break
}
return nil, err
}
cells := make([]*TableDiffCell, len(row))
for j := range row {
if celltype == TableDiffCellDel {
cells[j] = &TableDiffCell{LeftCell: row[j], Type: celltype}
} else {
cells[j] = &TableDiffCell{RightCell: row[j], Type: celltype}
}
}
rows = append(rows, &TableDiffRow{RowIdx: i, Cells: cells})
i++
}
return []*TableDiffSection{{Rows: rows}}, nil
}
func createCsvDiff(diffFile *DiffFile, baseReader, headReader *csv.Reader) ([]*TableDiffSection, error) {
// Given the baseReader and headReader, we are going to create CSV Reader for each, baseCSVReader and b respectively
baseCSVReader, err := createCsvReader(baseReader, maxRowsToInspect)
if err != nil {
return nil, err
}
headCSVReader, err := createCsvReader(headReader, maxRowsToInspect)
if err != nil {
return nil, err
}
// Initializing the mappings of base to head (a2bColMap) and head to base (b2aColMap) columns
a2bColMap, b2aColMap := getColumnMapping(baseCSVReader, headCSVReader)
// Determines how many cols there will be in the diff table, which includes deleted columns from base and added columns to base
numDiffTableCols := len(a2bColMap) + countUnmappedColumns(b2aColMap)
if len(a2bColMap) < len(b2aColMap) {
numDiffTableCols = len(b2aColMap) + countUnmappedColumns(a2bColMap)
}
// createDiffTableRow takes the row # of the `a` line and `b` line of a diff (starting from 1), 0 if the line doesn't exist (undefined)
// in the base or head respectively.
// Returns a TableDiffRow which has the row index
createDiffTableRow := func(aLineNum, bLineNum int) (*TableDiffRow, error) {
// diffTableCells is a row of the diff table. It will have a cells for added, deleted, changed, and unchanged content, thus either
// the same size as the head table or bigger
diffTableCells := make([]*TableDiffCell, numDiffTableCols)
var bRow *[]string
if bLineNum > 0 {
row, err := headCSVReader.GetRow(bLineNum - 1)
if err != nil {
return nil, err
}
bRow = &row
}
var aRow *[]string
if aLineNum > 0 {
row, err := baseCSVReader.GetRow(aLineNum - 1)
if err != nil {
return nil, err
}
aRow = &row
}
if aRow == nil && bRow == nil {
// No content
return nil, nil
}
aIndex := 0 // tracks where we are in the a2bColMap
bIndex := 0 // tracks where we are in the b2aColMap
colsAdded := 0 // incremented whenever we found a column was added
colsDeleted := 0 // incrememted whenever a column was deleted
// We loop until both the aIndex and bIndex are greater than their col map, which then we are done
for aIndex < len(a2bColMap) || bIndex < len(b2aColMap) {
// Starting from where aIndex is currently pointing, we see if the map is -1 (dleeted) and if is, create column to note that, increment, and look at the next aIndex
for aIndex < len(a2bColMap) && a2bColMap[aIndex] == -1 && (bIndex >= len(b2aColMap) || aIndex <= bIndex) {
var aCell string
if aRow != nil {
if cell, err := getCell(*aRow, aIndex); err != nil {
if err != ErrorUndefinedCell {
return nil, err
}
} else {
aCell = cell
}
}
diffTableCells[bIndex+colsDeleted] = &TableDiffCell{LeftCell: aCell, Type: TableDiffCellDel}
aIndex++
colsDeleted++
}
// aIndex is now pointing to a column that also exists in b, or is at the end of a2bColMap. If the former,
// we can just increment aIndex until it points to a -1 column or one greater than the current bIndex
for aIndex < len(a2bColMap) && a2bColMap[aIndex] != -1 {
aIndex++
}
// Starting from where bIndex is currently pointing, we see if the map is -1 (added) and if is, create column to note that, increment, and look at the next aIndex
for bIndex < len(b2aColMap) && b2aColMap[bIndex] == -1 && (aIndex >= len(a2bColMap) || bIndex < aIndex) {
var bCell string
cellType := TableDiffCellAdd
if bRow != nil {
if cell, err := getCell(*bRow, bIndex); err != nil {
if err != ErrorUndefinedCell {
return nil, err
}
} else {
bCell = cell
}
} else {
cellType = TableDiffCellDel
}
diffTableCells[bIndex+colsDeleted] = &TableDiffCell{RightCell: bCell, Type: cellType}
bIndex++
colsAdded++
}
// aIndex is now pointing to a column that also exists in a, or is at the end of b2aColMap. If the former,
// we get the a col and b col values (if they exist), figure out if they are the same or not, and if the column moved, and add it to the diff table
for bIndex < len(b2aColMap) && b2aColMap[bIndex] != -1 && (aIndex >= len(a2bColMap) || bIndex < aIndex) {
var diffTableCell TableDiffCell
var aCell *string
// get the aCell value if the aRow exists
if aRow != nil {
if cell, err := getCell(*aRow, b2aColMap[bIndex]); err != nil {
if err != ErrorUndefinedCell {
return nil, err
}
} else {
aCell = &cell
diffTableCell.LeftCell = cell
}
} else {
diffTableCell.Type = TableDiffCellAdd
}
var bCell *string
// get the bCell value if the bRow exists
if bRow != nil {
if cell, err := getCell(*bRow, bIndex); err != nil {
if err != ErrorUndefinedCell {
return nil, err
}
} else {
bCell = &cell
diffTableCell.RightCell = cell
}
} else {
diffTableCell.Type = TableDiffCellDel
}
// if both a and b have a row that exists, compare the value and determine if the row has moved
if aCell != nil && bCell != nil {
moved := ((bIndex + colsDeleted) != (b2aColMap[bIndex] + colsAdded))
if *aCell != *bCell {
if moved {
diffTableCell.Type = TableDiffCellMovedChanged
} else {
diffTableCell.Type = TableDiffCellChanged
}
} else {
if moved {
diffTableCell.Type = TableDiffCellMovedUnchanged
} else {
diffTableCell.Type = TableDiffCellUnchanged
}
diffTableCell.LeftCell = ""
}
}
// Add the diff column to the diff row
diffTableCells[bIndex+colsDeleted] = &diffTableCell
bIndex++
}
}
return &TableDiffRow{RowIdx: bLineNum, Cells: diffTableCells}, nil
}
// diffTableSections are TableDiffSections which represent the diffTableSections we get when doing a diff, each will be its own table in the view
var diffTableSections []*TableDiffSection
for i, section := range diffFile.Sections {
// Each section has multiple diffTableRows
var diffTableRows []*TableDiffRow
lines := tryMergeLines(section.Lines)
// Loop through the merged lines to get each row of the CSV diff table for this section
for j, line := range lines {
if i == 0 && j == 0 && (line[0] != 1 || line[1] != 1) {
diffTableRow, err := createDiffTableRow(1, 1)
if err != nil {
return nil, err
}
if diffTableRow != nil {
diffTableRows = append(diffTableRows, diffTableRow)
}
}
diffTableRow, err := createDiffTableRow(line[0], line[1])
if err != nil {
return nil, err
}
if diffTableRow != nil {
diffTableRows = append(diffTableRows, diffTableRow)
}
}
if len(diffTableRows) > 0 {
diffTableSections = append(diffTableSections, &TableDiffSection{Rows: diffTableRows})
}
}
return diffTableSections, nil
}
// getColumnMapping creates a mapping of columns between a and b
func getColumnMapping(baseCSVReader, headCSVReader *csvReader) ([]int, []int) {
baseRow, _ := baseCSVReader.GetRow(0)
headRow, _ := headCSVReader.GetRow(0)
base2HeadColMap := []int{}
head2BaseColMap := []int{}
if baseRow != nil {
base2HeadColMap = make([]int, len(baseRow))
}
if headRow != nil {
head2BaseColMap = make([]int, len(headRow))
}
// Initializes all head2base mappings to be unmappedColumn (-1)
for i := 0; i < len(head2BaseColMap); i++ {
head2BaseColMap[i] = unmappedColumn
}
// Loops through the baseRow and see if there is a match in the head row
for i := range baseRow {
base2HeadColMap[i] = unmappedColumn
baseCell, err := getCell(baseRow, i)
if err == nil {
for j := range headRow {
if head2BaseColMap[j] == -1 {
headCell, err := getCell(headRow, j)
if err == nil && baseCell == headCell {
base2HeadColMap[i] = j
head2BaseColMap[j] = i
break
}
}
}
}
}
tryMapColumnsByContent(baseCSVReader, base2HeadColMap, headCSVReader, head2BaseColMap)
tryMapColumnsByContent(headCSVReader, head2BaseColMap, baseCSVReader, base2HeadColMap)
return base2HeadColMap, head2BaseColMap
}
// tryMapColumnsByContent tries to map missing columns by the content of the first lines.
func tryMapColumnsByContent(baseCSVReader *csvReader, base2HeadColMap []int, headCSVReader *csvReader, head2BaseColMap []int) {
for i := range base2HeadColMap {
headStart := 0
for base2HeadColMap[i] == unmappedColumn && headStart < len(head2BaseColMap) {
if head2BaseColMap[headStart] == unmappedColumn {
rows := min(maxRowsToInspect, max(0, min(len(baseCSVReader.buffer), len(headCSVReader.buffer))-1))
same := 0
for j := 1; j <= rows; j++ {
baseCell, baseErr := getCell(baseCSVReader.buffer[j], i)
headCell, headErr := getCell(headCSVReader.buffer[j], headStart)
if baseErr == nil && headErr == nil && baseCell == headCell {
same++
}
}
if (float32(same) / float32(rows)) > minRatioToMatch {
base2HeadColMap[i] = headStart
head2BaseColMap[headStart] = i
}
}
headStart++
}
}
}
// getCell returns the specific cell or nil if not present.
func getCell(row []string, column int) (string, error) {
if column < len(row) {
return row[column], nil
}
return "", ErrorUndefinedCell
}
// countUnmappedColumns returns the count of unmapped columns.
func countUnmappedColumns(mapping []int) int {
count := 0
for i := range mapping {
if mapping[i] == unmappedColumn {
count++
}
}
return count
}
// tryMergeLines maps the separated line numbers of a git diff. The result is assumed to be ordered.
func tryMergeLines(lines []*DiffLine) [][2]int {
ids := make([][2]int, len(lines))
i := 0
for _, line := range lines {
if line.Type != DiffLineSection {
ids[i][0] = line.LeftIdx
ids[i][1] = line.RightIdx
i++
}
}
ids = ids[:i]
result := make([][2]int, len(ids))
j := 0
for i = 0; i < len(ids); i++ {
if ids[i][0] == 0 {
if j > 0 && result[j-1][1] == 0 {
temp := j
for temp > 0 && result[temp-1][1] == 0 {
temp--
}
result[temp][1] = ids[i][1]
continue
}
}
result[j] = ids[i]
j++
}
return result[:j]
}

View File

@@ -0,0 +1,223 @@
// Copyright 2021 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package gitdiff
import (
"encoding/csv"
"strings"
"testing"
"code.gitea.io/gitea/models/db"
csv_module "code.gitea.io/gitea/modules/csv"
"code.gitea.io/gitea/modules/setting"
"github.com/stretchr/testify/assert"
)
func TestCSVDiff(t *testing.T) {
cases := []struct {
diff string
base string
head string
cells [][]TableDiffCellType
}{
// case 0 - initial commit of a csv
{
diff: `diff --git a/unittest.csv b/unittest.csv
--- a/unittest.csv
+++ b/unittest.csv
@@ -0,0 +1,2 @@
+col1,col2
+a,a`,
base: "",
head: `col1,col2
a,a`,
cells: [][]TableDiffCellType{
{TableDiffCellAdd, TableDiffCellAdd},
{TableDiffCellAdd, TableDiffCellAdd},
},
},
// case 1 - adding 1 row at end
{
diff: `diff --git a/unittest.csv b/unittest.csv
--- a/unittest.csv
+++ b/unittest.csv
@@ -1,2 +1,3 @@
col1,col2
-a,a
+a,a
+b,b`,
base: `col1,col2
a,a`,
head: `col1,col2
a,a
b,b`,
cells: [][]TableDiffCellType{
{TableDiffCellUnchanged, TableDiffCellUnchanged},
{TableDiffCellUnchanged, TableDiffCellUnchanged},
{TableDiffCellAdd, TableDiffCellAdd},
},
},
// case 2 - row deleted
{
diff: `diff --git a/unittest.csv b/unittest.csv
--- a/unittest.csv
+++ b/unittest.csv
@@ -1,3 +1,2 @@
col1,col2
-a,a
b,b`,
base: `col1,col2
a,a
b,b`,
head: `col1,col2
b,b`,
cells: [][]TableDiffCellType{
{TableDiffCellUnchanged, TableDiffCellUnchanged},
{TableDiffCellDel, TableDiffCellDel},
{TableDiffCellUnchanged, TableDiffCellUnchanged},
},
},
// case 3 - row changed
{
diff: `diff --git a/unittest.csv b/unittest.csv
--- a/unittest.csv
+++ b/unittest.csv
@@ -1,2 +1,2 @@
col1,col2
-b,b
+b,c`,
base: `col1,col2
b,b`,
head: `col1,col2
b,c`,
cells: [][]TableDiffCellType{
{TableDiffCellUnchanged, TableDiffCellUnchanged},
{TableDiffCellUnchanged, TableDiffCellChanged},
},
},
// case 4 - all deleted
{
diff: `diff --git a/unittest.csv b/unittest.csv
--- a/unittest.csv
+++ b/unittest.csv
@@ -1,2 +0,0 @@
-col1,col2
-b,c`,
base: `col1,col2
b,c`,
head: "",
cells: [][]TableDiffCellType{
{TableDiffCellDel, TableDiffCellDel},
{TableDiffCellDel, TableDiffCellDel},
},
},
// case 5 - renames first column
{
diff: `diff --git a/unittest.csv b/unittest.csv
--- a/unittest.csv
+++ b/unittest.csv
@@ -1,3 +1,3 @@
-col1,col2,col3
+cola,col2,col3
a,b,c`,
base: `col1,col2,col3
a,b,c`,
head: `cola,col2,col3
a,b,c`,
cells: [][]TableDiffCellType{
{TableDiffCellDel, TableDiffCellAdd, TableDiffCellUnchanged, TableDiffCellUnchanged},
{TableDiffCellDel, TableDiffCellAdd, TableDiffCellUnchanged, TableDiffCellUnchanged},
},
},
// case 6 - inserts a column after first, deletes last column
{
diff: `diff --git a/unittest.csv b/unittest.csv
--- a/unittest.csv
+++ b/unittest.csv
@@ -1,2 +1,2 @@
-col1,col2,col3
-a,b,c
+col1,col1a,col2
+a,d,b`,
base: `col1,col2,col3
a,b,c`,
head: `col1,col1a,col2
a,d,b`,
cells: [][]TableDiffCellType{
{TableDiffCellUnchanged, TableDiffCellAdd, TableDiffCellDel, TableDiffCellMovedUnchanged},
{TableDiffCellUnchanged, TableDiffCellAdd, TableDiffCellDel, TableDiffCellMovedUnchanged},
},
},
// case 7 - deletes first column, inserts column after last
{
diff: `diff --git a/unittest.csv b/unittest.csv
--- a/unittest.csv
+++ b/unittest.csv
@@ -1,2 +1,2 @@
-col1,col2,col3
-a,b,c
+col2,col3,col4
+b,c,d`,
base: `col1,col2,col3
a,b,c`,
head: `col2,col3,col4
b,c,d`,
cells: [][]TableDiffCellType{
{TableDiffCellDel, TableDiffCellUnchanged, TableDiffCellUnchanged, TableDiffCellAdd},
{TableDiffCellDel, TableDiffCellUnchanged, TableDiffCellUnchanged, TableDiffCellAdd},
},
},
// case 8 - two columns deleted, 2 added
{
diff: `diff --git a/unittest.csv b/unittest.csv
--- a/unittest.csv
+++ b/unittest.csv
@@ -1,2 +1,2 @@
-col1,col2,col
-a,b,c
+col3,col4,col5
+c,d,e`,
base: `col1,col2,col3
a,b,c`,
head: `col3,col4,col5
c,d,e`,
cells: [][]TableDiffCellType{
{TableDiffCellDel, TableDiffCellMovedUnchanged, TableDiffCellDel, TableDiffCellAdd, TableDiffCellAdd},
{TableDiffCellDel, TableDiffCellMovedUnchanged, TableDiffCellDel, TableDiffCellAdd, TableDiffCellAdd},
},
},
}
for n, c := range cases {
diff, err := ParsePatch(db.DefaultContext, setting.Git.MaxGitDiffLines, setting.Git.MaxGitDiffLineCharacters, setting.Git.MaxGitDiffFiles, strings.NewReader(c.diff), "")
assert.NoError(t, err)
var baseReader *csv.Reader
if len(c.base) > 0 {
baseReader, err = csv_module.CreateReaderAndDetermineDelimiter(nil, strings.NewReader(c.base))
assert.NoError(t, err)
}
var headReader *csv.Reader
if len(c.head) > 0 {
headReader, err = csv_module.CreateReaderAndDetermineDelimiter(nil, strings.NewReader(c.head))
assert.NoError(t, err)
}
result, err := CreateCsvDiff(diff.Files[0], baseReader, headReader)
assert.NoError(t, err)
assert.Len(t, result, 1, "case %d: should be one section", n)
section := result[0]
assert.Len(t, section.Rows, len(c.cells), "case %d: should be %d rows", n, len(c.cells))
for i, row := range section.Rows {
assert.Len(t, row.Cells, len(c.cells[i]), "case %d: row %d should have %d cells", n, i, len(c.cells[i]))
for j, cell := range row.Cells {
assert.Equal(t, c.cells[i][j], cell.Type, "case %d: row %d cell %d should be equal", n, i, j)
}
}
}
}

View File

@@ -0,0 +1,250 @@
// Copyright 2025 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package gitdiff
import (
"bufio"
"context"
"errors"
"fmt"
"io"
"strconv"
"strings"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/log"
)
type DiffTree struct {
Files []*DiffTreeRecord
}
type DiffTreeRecord struct {
// Status is one of 'added', 'deleted', 'modified', 'renamed', 'copied', 'typechanged', 'unmerged', 'unknown'
Status string
// For renames and copies, the percentage of similarity between the source and target of the move/rename.
Score uint8
HeadPath string
BasePath string
HeadMode git.EntryMode
BaseMode git.EntryMode
HeadBlobID string
BaseBlobID string
}
// GetDiffTree returns the list of path of the files that have changed between the two commits.
// If useMergeBase is true, the diff will be calculated using the merge base of the two commits.
// This is the same behavior as using a three-dot diff in git diff.
func GetDiffTree(ctx context.Context, gitRepo *git.Repository, useMergeBase bool, baseSha, headSha string) (*DiffTree, error) {
gitDiffTreeRecords, err := runGitDiffTree(ctx, gitRepo, useMergeBase, baseSha, headSha)
if err != nil {
return nil, err
}
return &DiffTree{
Files: gitDiffTreeRecords,
}, nil
}
func runGitDiffTree(ctx context.Context, gitRepo *git.Repository, useMergeBase bool, baseSha, headSha string) ([]*DiffTreeRecord, error) {
useMergeBase, baseCommitID, headCommitID, err := validateGitDiffTreeArguments(gitRepo, useMergeBase, baseSha, headSha)
if err != nil {
return nil, err
}
cmd := git.NewCommand("diff-tree", "--raw", "-r", "--find-renames", "--root")
if useMergeBase {
cmd.AddArguments("--merge-base")
}
cmd.AddDynamicArguments(baseCommitID, headCommitID)
stdout, _, runErr := cmd.RunStdString(ctx, &git.RunOpts{Dir: gitRepo.Path})
if runErr != nil {
log.Warn("git diff-tree: %v", runErr)
return nil, runErr
}
return parseGitDiffTree(strings.NewReader(stdout))
}
func validateGitDiffTreeArguments(gitRepo *git.Repository, useMergeBase bool, baseSha, headSha string) (shouldUseMergeBase bool, resolvedBaseSha, resolvedHeadSha string, err error) {
// if the head is empty its an error
if headSha == "" {
return false, "", "", errors.New("headSha is empty")
}
// if the head commit doesn't exist its and error
headCommit, err := gitRepo.GetCommit(headSha)
if err != nil {
return false, "", "", fmt.Errorf("failed to get commit headSha: %v", err)
}
headCommitID := headCommit.ID.String()
// if the base is empty we should use the parent of the head commit
if baseSha == "" {
// if the headCommit has no parent we should use an empty commit
// this can happen when we are generating a diff against an orphaned commit
if headCommit.ParentCount() == 0 {
objectFormat, err := gitRepo.GetObjectFormat()
if err != nil {
return false, "", "", err
}
// We set use merge base to false because we have no base commit
return false, objectFormat.EmptyTree().String(), headCommitID, nil
}
baseCommit, err := headCommit.Parent(0)
if err != nil {
return false, "", "", fmt.Errorf("baseSha is '', attempted to use parent of commit %s, got error: %v", headCommit.ID.String(), err)
}
return useMergeBase, baseCommit.ID.String(), headCommitID, nil
}
// try and get the base commit
baseCommit, err := gitRepo.GetCommit(baseSha)
// propagate the error if we couldn't get the base commit
if err != nil {
return useMergeBase, "", "", fmt.Errorf("failed to get base commit %s: %v", baseSha, err)
}
return useMergeBase, baseCommit.ID.String(), headCommit.ID.String(), nil
}
func parseGitDiffTree(gitOutput io.Reader) ([]*DiffTreeRecord, error) {
/*
The output of `git diff-tree --raw -r --find-renames` is of the form:
:<old_mode> <new_mode> <old_sha> <new_sha> <status>\t<path>
or for renames:
:<old_mode> <new_mode> <old_sha> <new_sha> <status>\t<old_path>\t<new_path>
See: <https://git-scm.com/docs/git-diff-tree#_raw_output_format> for more details
*/
results := make([]*DiffTreeRecord, 0)
lines := bufio.NewScanner(gitOutput)
for lines.Scan() {
line := lines.Text()
if len(line) == 0 {
continue
}
record, err := parseGitDiffTreeLine(line)
if err != nil {
return nil, err
}
results = append(results, record)
}
if err := lines.Err(); err != nil {
return nil, err
}
return results, nil
}
func parseGitDiffTreeLine(line string) (*DiffTreeRecord, error) {
line = strings.TrimPrefix(line, ":")
splitSections := strings.SplitN(line, "\t", 2)
if len(splitSections) < 2 {
return nil, fmt.Errorf("unparsable output for diff-tree --raw: `%s`)", line)
}
fields := strings.Fields(splitSections[0])
if len(fields) < 5 {
return nil, fmt.Errorf("unparsable output for diff-tree --raw: `%s`, expected 5 space delimited values got %d)", line, len(fields))
}
baseMode, err := git.ParseEntryMode(fields[0])
if err != nil {
return nil, err
}
headMode, err := git.ParseEntryMode(fields[1])
if err != nil {
return nil, err
}
baseBlobID := fields[2]
headBlobID := fields[3]
status, score, err := statusFromLetter(fields[4])
if err != nil {
return nil, fmt.Errorf("unparsable output for diff-tree --raw: %s, error: %s", line, err)
}
filePaths := strings.Split(splitSections[1], "\t")
var headPath, basePath string
if status == "renamed" {
if len(filePaths) != 2 {
return nil, fmt.Errorf("unparsable output for diff-tree --raw: `%s`, expected 2 paths found %d", line, len(filePaths))
}
basePath = filePaths[0]
headPath = filePaths[1]
} else {
basePath = filePaths[0]
headPath = filePaths[0]
}
return &DiffTreeRecord{
Status: status,
Score: score,
BaseMode: baseMode,
HeadMode: headMode,
BaseBlobID: baseBlobID,
HeadBlobID: headBlobID,
BasePath: basePath,
HeadPath: headPath,
}, nil
}
func statusFromLetter(rawStatus string) (status string, score uint8, err error) {
if len(rawStatus) < 1 {
return "", 0, errors.New("empty status letter")
}
switch rawStatus[0] {
case 'A':
return "added", 0, nil
case 'D':
return "deleted", 0, nil
case 'M':
return "modified", 0, nil
case 'R':
score, err = tryParseStatusScore(rawStatus)
return "renamed", score, err
case 'C':
score, err = tryParseStatusScore(rawStatus)
return "copied", score, err
case 'T':
return "typechanged", 0, nil
case 'U':
return "unmerged", 0, nil
case 'X':
return "unknown", 0, nil
default:
return "", 0, fmt.Errorf("unknown status letter: '%s'", rawStatus)
}
}
func tryParseStatusScore(rawStatus string) (uint8, error) {
if len(rawStatus) < 2 {
return 0, errors.New("status score missing")
}
score, err := strconv.ParseUint(rawStatus[1:], 10, 8)
if err != nil {
return 0, fmt.Errorf("failed to parse status score: %w", err)
} else if score > 100 {
return 0, fmt.Errorf("status score out of range: %d", score)
}
return uint8(score), nil
}

View File

@@ -0,0 +1,427 @@
// Copyright 2025 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package gitdiff
import (
"strings"
"testing"
"code.gitea.io/gitea/models/db"
"code.gitea.io/gitea/modules/git"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestGitDiffTree(t *testing.T) {
test := []struct {
Name string
RepoPath string
BaseSha string
HeadSha string
useMergeBase bool
Expected *DiffTree
}{
{
Name: "happy path",
RepoPath: "../../modules/git/tests/repos/repo5_pulls",
BaseSha: "72866af952e98d02a73003501836074b286a78f6",
HeadSha: "d8e0bbb45f200e67d9a784ce55bd90821af45ebd",
Expected: &DiffTree{
Files: []*DiffTreeRecord{
{
Status: "modified",
HeadPath: "LICENSE",
BasePath: "LICENSE",
HeadMode: git.EntryModeBlob,
BaseMode: git.EntryModeBlob,
HeadBlobID: "ee469963e76ae1bb7ee83d7510df2864e6c8c640",
BaseBlobID: "c996f4725be8fc8c1d1c776e58c97ddc5d03b336",
},
{
Status: "modified",
HeadPath: "README.md",
BasePath: "README.md",
HeadMode: git.EntryModeBlob,
BaseMode: git.EntryModeBlob,
HeadBlobID: "9dfc0a6257d8eff526f0cfaf6a8ea950f55a9dba",
BaseBlobID: "074e590b8e64898b02beef03ece83f962c94f54c",
},
},
},
},
{
Name: "first commit (no parent)",
RepoPath: "../../modules/git/tests/repos/repo5_pulls",
HeadSha: "72866af952e98d02a73003501836074b286a78f6",
Expected: &DiffTree{
Files: []*DiffTreeRecord{
{
Status: "added",
HeadPath: ".gitignore",
BasePath: ".gitignore",
HeadMode: git.EntryModeBlob,
BaseMode: git.EntryModeNoEntry,
HeadBlobID: "f1c181ec9c5c921245027c6b452ecfc1d3626364",
BaseBlobID: "0000000000000000000000000000000000000000",
},
{
Status: "added",
HeadPath: "LICENSE",
BasePath: "LICENSE",
HeadMode: git.EntryModeBlob,
BaseMode: git.EntryModeNoEntry,
HeadBlobID: "c996f4725be8fc8c1d1c776e58c97ddc5d03b336",
BaseBlobID: "0000000000000000000000000000000000000000",
},
{
Status: "added",
HeadPath: "README.md",
BasePath: "README.md",
HeadMode: git.EntryModeBlob,
BaseMode: git.EntryModeNoEntry,
HeadBlobID: "074e590b8e64898b02beef03ece83f962c94f54c",
BaseBlobID: "0000000000000000000000000000000000000000",
},
},
},
},
{
Name: "first commit (no parent), merge base = true",
RepoPath: "../../modules/git/tests/repos/repo5_pulls",
HeadSha: "72866af952e98d02a73003501836074b286a78f6",
useMergeBase: true,
Expected: &DiffTree{
Files: []*DiffTreeRecord{
{
Status: "added",
HeadPath: ".gitignore",
BasePath: ".gitignore",
HeadMode: git.EntryModeBlob,
BaseMode: git.EntryModeNoEntry,
HeadBlobID: "f1c181ec9c5c921245027c6b452ecfc1d3626364",
BaseBlobID: "0000000000000000000000000000000000000000",
},
{
Status: "added",
HeadPath: "LICENSE",
BasePath: "LICENSE",
HeadMode: git.EntryModeBlob,
BaseMode: git.EntryModeNoEntry,
HeadBlobID: "c996f4725be8fc8c1d1c776e58c97ddc5d03b336",
BaseBlobID: "0000000000000000000000000000000000000000",
},
{
Status: "added",
HeadPath: "README.md",
BasePath: "README.md",
HeadMode: git.EntryModeBlob,
BaseMode: git.EntryModeNoEntry,
HeadBlobID: "074e590b8e64898b02beef03ece83f962c94f54c",
BaseBlobID: "0000000000000000000000000000000000000000",
},
},
},
},
{
Name: "base and head same",
RepoPath: "../../modules/git/tests/repos/repo5_pulls",
BaseSha: "ed8f4d2fa5b2420706580d191f5dd50c4e491f3f",
HeadSha: "ed8f4d2fa5b2420706580d191f5dd50c4e491f3f",
Expected: &DiffTree{
Files: []*DiffTreeRecord{},
},
},
{
Name: "useMergeBase false",
RepoPath: "../../modules/git/tests/repos/repo5_pulls",
BaseSha: "ed8f4d2fa5b2420706580d191f5dd50c4e491f3f",
HeadSha: "111cac04bd7d20301964e27a93698aabb5781b80", // this commit can be found on the update-readme branch
useMergeBase: false,
Expected: &DiffTree{
Files: []*DiffTreeRecord{
{
Status: "modified",
HeadPath: "LICENSE",
BasePath: "LICENSE",
HeadMode: git.EntryModeBlob,
BaseMode: git.EntryModeBlob,
HeadBlobID: "c996f4725be8fc8c1d1c776e58c97ddc5d03b336",
BaseBlobID: "ed5119b3c1f45547b6785bc03eac7f87570fa17f",
},
{
Status: "modified",
HeadPath: "README.md",
BasePath: "README.md",
HeadMode: git.EntryModeBlob,
BaseMode: git.EntryModeBlob,
HeadBlobID: "fb39771a8865c9a67f2ab9b616c854805664553c",
BaseBlobID: "9dfc0a6257d8eff526f0cfaf6a8ea950f55a9dba",
},
},
},
},
{
Name: "useMergeBase true",
RepoPath: "../../modules/git/tests/repos/repo5_pulls",
BaseSha: "ed8f4d2fa5b2420706580d191f5dd50c4e491f3f",
HeadSha: "111cac04bd7d20301964e27a93698aabb5781b80", // this commit can be found on the update-readme branch
useMergeBase: true,
Expected: &DiffTree{
Files: []*DiffTreeRecord{
{
Status: "modified",
HeadPath: "README.md",
BasePath: "README.md",
HeadMode: git.EntryModeBlob,
BaseMode: git.EntryModeBlob,
HeadBlobID: "fb39771a8865c9a67f2ab9b616c854805664553c",
BaseBlobID: "9dfc0a6257d8eff526f0cfaf6a8ea950f55a9dba",
},
},
},
},
{
Name: "no base set",
RepoPath: "../../modules/git/tests/repos/repo5_pulls",
HeadSha: "d8e0bbb45f200e67d9a784ce55bd90821af45ebd", // this commit can be found on the update-readme branch
useMergeBase: false,
Expected: &DiffTree{
Files: []*DiffTreeRecord{
{
Status: "modified",
HeadPath: "LICENSE",
BasePath: "LICENSE",
HeadMode: git.EntryModeBlob,
BaseMode: git.EntryModeBlob,
HeadBlobID: "ee469963e76ae1bb7ee83d7510df2864e6c8c640",
BaseBlobID: "ed5119b3c1f45547b6785bc03eac7f87570fa17f",
},
},
},
},
}
for _, tt := range test {
t.Run(tt.Name, func(t *testing.T) {
gitRepo, err := git.OpenRepository(git.DefaultContext, tt.RepoPath)
assert.NoError(t, err)
defer gitRepo.Close()
diffPaths, err := GetDiffTree(db.DefaultContext, gitRepo, tt.useMergeBase, tt.BaseSha, tt.HeadSha)
require.NoError(t, err)
assert.Equal(t, tt.Expected, diffPaths)
})
}
}
func TestParseGitDiffTree(t *testing.T) {
test := []struct {
Name string
GitOutput string
Expected []*DiffTreeRecord
}{
{
Name: "file change",
GitOutput: ":100644 100644 64e43d23bcd08db12563a0a4d84309cadb437e1a 5dbc7792b5bb228647cfcc8dfe65fc649119dedc M\tResources/views/curriculum/edit.blade.php",
Expected: []*DiffTreeRecord{
{
Status: "modified",
HeadPath: "Resources/views/curriculum/edit.blade.php",
BasePath: "Resources/views/curriculum/edit.blade.php",
HeadMode: git.EntryModeBlob,
BaseMode: git.EntryModeBlob,
HeadBlobID: "5dbc7792b5bb228647cfcc8dfe65fc649119dedc",
BaseBlobID: "64e43d23bcd08db12563a0a4d84309cadb437e1a",
},
},
},
{
Name: "file added",
GitOutput: ":000000 100644 0000000000000000000000000000000000000000 0063162fb403db15ceb0517b34ab782e4e58b619 A\tResources/views/class/index.blade.php",
Expected: []*DiffTreeRecord{
{
Status: "added",
HeadPath: "Resources/views/class/index.blade.php",
BasePath: "Resources/views/class/index.blade.php",
HeadMode: git.EntryModeBlob,
BaseMode: git.EntryModeNoEntry,
HeadBlobID: "0063162fb403db15ceb0517b34ab782e4e58b619",
BaseBlobID: "0000000000000000000000000000000000000000",
},
},
},
{
Name: "file deleted",
GitOutput: ":100644 000000 bac4286303c8c0017ea2f0a48c561ddcc0330a14 0000000000000000000000000000000000000000 D\tResources/views/classes/index.blade.php",
Expected: []*DiffTreeRecord{
{
Status: "deleted",
HeadPath: "Resources/views/classes/index.blade.php",
BasePath: "Resources/views/classes/index.blade.php",
HeadMode: git.EntryModeNoEntry,
BaseMode: git.EntryModeBlob,
HeadBlobID: "0000000000000000000000000000000000000000",
BaseBlobID: "bac4286303c8c0017ea2f0a48c561ddcc0330a14",
},
},
},
{
Name: "file renamed",
GitOutput: ":100644 100644 c8a055cfb45cd39747292983ad1797ceab40f5b1 97248f79a90aaf81fe7fd74b33c1cb182dd41783 R087\tDatabase/Seeders/AdminDatabaseSeeder.php\tDatabase/Seeders/AcademicDatabaseSeeder.php",
Expected: []*DiffTreeRecord{
{
Status: "renamed",
Score: 87,
HeadPath: "Database/Seeders/AcademicDatabaseSeeder.php",
BasePath: "Database/Seeders/AdminDatabaseSeeder.php",
HeadMode: git.EntryModeBlob,
BaseMode: git.EntryModeBlob,
HeadBlobID: "97248f79a90aaf81fe7fd74b33c1cb182dd41783",
BaseBlobID: "c8a055cfb45cd39747292983ad1797ceab40f5b1",
},
},
},
{
Name: "no changes",
GitOutput: ``,
Expected: []*DiffTreeRecord{},
},
{
Name: "multiple changes",
GitOutput: ":000000 100644 0000000000000000000000000000000000000000 db736b44533a840981f1f17b7029d0f612b69550 A\tHttp/Controllers/ClassController.php\n" +
":100644 000000 9a4d2344d4d0145db7c91b3f3e123c74367d4ef4 0000000000000000000000000000000000000000 D\tHttp/Controllers/ClassesController.php\n" +
":100644 100644 f060d6aede65d423f49e7dc248dfa0d8835ef920 b82c8e39a3602dedadb44669956d6eb5b6a7cc86 M\tHttp/Controllers/ProgramDirectorController.php\n",
Expected: []*DiffTreeRecord{
{
Status: "added",
HeadPath: "Http/Controllers/ClassController.php",
BasePath: "Http/Controllers/ClassController.php",
HeadMode: git.EntryModeBlob,
BaseMode: git.EntryModeNoEntry,
HeadBlobID: "db736b44533a840981f1f17b7029d0f612b69550",
BaseBlobID: "0000000000000000000000000000000000000000",
},
{
Status: "deleted",
HeadPath: "Http/Controllers/ClassesController.php",
BasePath: "Http/Controllers/ClassesController.php",
HeadMode: git.EntryModeNoEntry,
BaseMode: git.EntryModeBlob,
HeadBlobID: "0000000000000000000000000000000000000000",
BaseBlobID: "9a4d2344d4d0145db7c91b3f3e123c74367d4ef4",
},
{
Status: "modified",
HeadPath: "Http/Controllers/ProgramDirectorController.php",
BasePath: "Http/Controllers/ProgramDirectorController.php",
HeadMode: git.EntryModeBlob,
BaseMode: git.EntryModeBlob,
HeadBlobID: "b82c8e39a3602dedadb44669956d6eb5b6a7cc86",
BaseBlobID: "f060d6aede65d423f49e7dc248dfa0d8835ef920",
},
},
},
{
Name: "spaces in file path",
GitOutput: ":000000 100644 0000000000000000000000000000000000000000 db736b44533a840981f1f17b7029d0f612b69550 A\tHttp /Controllers/Class Controller.php\n" +
":100644 000000 9a4d2344d4d0145db7c91b3f3e123c74367d4ef4 0000000000000000000000000000000000000000 D\tHttp/Cont rollers/Classes Controller.php\n" +
":100644 100644 f060d6aede65d423f49e7dc248dfa0d8835ef920 b82c8e39a3602dedadb44669956d6eb5b6a7cc86 R010\tHttp/Controllers/Program Director Controller.php\tHttp/Cont rollers/ProgramDirectorController.php\n",
Expected: []*DiffTreeRecord{
{
Status: "added",
HeadPath: "Http /Controllers/Class Controller.php",
BasePath: "Http /Controllers/Class Controller.php",
HeadMode: git.EntryModeBlob,
BaseMode: git.EntryModeNoEntry,
HeadBlobID: "db736b44533a840981f1f17b7029d0f612b69550",
BaseBlobID: "0000000000000000000000000000000000000000",
},
{
Status: "deleted",
HeadPath: "Http/Cont rollers/Classes Controller.php",
BasePath: "Http/Cont rollers/Classes Controller.php",
HeadMode: git.EntryModeNoEntry,
BaseMode: git.EntryModeBlob,
HeadBlobID: "0000000000000000000000000000000000000000",
BaseBlobID: "9a4d2344d4d0145db7c91b3f3e123c74367d4ef4",
},
{
Status: "renamed",
Score: 10,
HeadPath: "Http/Cont rollers/ProgramDirectorController.php",
BasePath: "Http/Controllers/Program Director Controller.php",
HeadMode: git.EntryModeBlob,
BaseMode: git.EntryModeBlob,
HeadBlobID: "b82c8e39a3602dedadb44669956d6eb5b6a7cc86",
BaseBlobID: "f060d6aede65d423f49e7dc248dfa0d8835ef920",
},
},
},
{
Name: "file type changed",
GitOutput: ":100644 120000 344e0ca8aa791cc4164fb0ea645f334fd40d00f0 a7c2973de00bfdc6ca51d315f401b5199fe01dc3 T\twebpack.mix.js",
Expected: []*DiffTreeRecord{
{
Status: "typechanged",
HeadPath: "webpack.mix.js",
BasePath: "webpack.mix.js",
HeadMode: git.EntryModeSymlink,
BaseMode: git.EntryModeBlob,
HeadBlobID: "a7c2973de00bfdc6ca51d315f401b5199fe01dc3",
BaseBlobID: "344e0ca8aa791cc4164fb0ea645f334fd40d00f0",
},
},
},
}
for _, tt := range test {
t.Run(tt.Name, func(t *testing.T) {
entries, err := parseGitDiffTree(strings.NewReader(tt.GitOutput))
assert.NoError(t, err)
assert.Equal(t, tt.Expected, entries)
})
}
}
func TestGitDiffTreeErrors(t *testing.T) {
test := []struct {
Name string
RepoPath string
BaseSha string
HeadSha string
}{
{
Name: "head doesn't exist",
RepoPath: "../../modules/git/tests/repos/repo5_pulls",
BaseSha: "f32b0a9dfd09a60f616f29158f772cedd89942d2",
HeadSha: "asdfasdfasdf",
},
{
Name: "base doesn't exist",
RepoPath: "../../modules/git/tests/repos/repo5_pulls",
BaseSha: "asdfasdfasdf",
HeadSha: "f32b0a9dfd09a60f616f29158f772cedd89942d2",
},
{
Name: "head not set",
RepoPath: "../../modules/git/tests/repos/repo5_pulls",
BaseSha: "f32b0a9dfd09a60f616f29158f772cedd89942d2",
},
}
for _, tt := range test {
t.Run(tt.Name, func(t *testing.T) {
gitRepo, err := git.OpenRepository(git.DefaultContext, tt.RepoPath)
assert.NoError(t, err)
defer gitRepo.Close()
diffPaths, err := GetDiffTree(db.DefaultContext, gitRepo, true, tt.BaseSha, tt.HeadSha)
assert.Error(t, err)
assert.Nil(t, diffPaths)
})
}
}

1406
services/gitdiff/gitdiff.go Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,642 @@
// Copyright 2014 The Gogs Authors. All rights reserved.
// Copyright 2019 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package gitdiff
import (
"strconv"
"strings"
"testing"
"code.gitea.io/gitea/models/db"
issues_model "code.gitea.io/gitea/models/issues"
"code.gitea.io/gitea/models/unittest"
user_model "code.gitea.io/gitea/models/user"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/json"
"code.gitea.io/gitea/modules/setting"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestParsePatch_skipTo(t *testing.T) {
type testcase struct {
name string
gitdiff string
wantErr bool
addition int
deletion int
oldFilename string
filename string
skipTo string
}
tests := []testcase{
{
name: "readme.md2readme.md",
gitdiff: `diff --git "a/A \\ B" "b/A \\ B"
--- "a/A \\ B"
+++ "b/A \\ B"
@@ -1,3 +1,6 @@
# gitea-github-migrator
+
+ Build Status
- Latest Release
Docker Pulls
+ cut off
+ cut off
diff --git "\\a/README.md" "\\b/README.md"
--- "\\a/README.md"
+++ "\\b/README.md"
@@ -1,3 +1,6 @@
# gitea-github-migrator
+
+ Build Status
- Latest Release
Docker Pulls
+ cut off
+ cut off
`,
addition: 4,
deletion: 1,
filename: "README.md",
oldFilename: "README.md",
skipTo: "README.md",
},
{
name: "A \\ B",
gitdiff: `diff --git "a/A \\ B" "b/A \\ B"
--- "a/A \\ B"
+++ "b/A \\ B"
@@ -1,3 +1,6 @@
# gitea-github-migrator
+
+ Build Status
- Latest Release
Docker Pulls
+ cut off
+ cut off`,
addition: 4,
deletion: 1,
filename: "A \\ B",
oldFilename: "A \\ B",
skipTo: "A \\ B",
},
{
name: "A \\ B",
gitdiff: `diff --git "\\a/README.md" "\\b/README.md"
--- "\\a/README.md"
+++ "\\b/README.md"
@@ -1,3 +1,6 @@
# gitea-github-migrator
+
+ Build Status
- Latest Release
Docker Pulls
+ cut off
+ cut off
diff --git "a/A \\ B" "b/A \\ B"
--- "a/A \\ B"
+++ "b/A \\ B"
@@ -1,3 +1,6 @@
# gitea-github-migrator
+
+ Build Status
- Latest Release
Docker Pulls
+ cut off
+ cut off`,
addition: 4,
deletion: 1,
filename: "A \\ B",
oldFilename: "A \\ B",
skipTo: "A \\ B",
},
{
name: "readme.md2readme.md",
gitdiff: `diff --git "a/A \\ B" "b/A \\ B"
--- "a/A \\ B"
+++ "b/A \\ B"
@@ -1,3 +1,6 @@
# gitea-github-migrator
+
+ Build Status
- Latest Release
Docker Pulls
+ cut off
+ cut off
diff --git "a/A \\ B" "b/A \\ B"
--- "a/A \\ B"
+++ "b/A \\ B"
@@ -1,3 +1,6 @@
# gitea-github-migrator
+
+ Build Status
- Latest Release
Docker Pulls
+ cut off
+ cut off
diff --git "\\a/README.md" "\\b/README.md"
--- "\\a/README.md"
+++ "\\b/README.md"
@@ -1,3 +1,6 @@
# gitea-github-migrator
+
+ Build Status
- Latest Release
Docker Pulls
+ cut off
+ cut off
`,
addition: 4,
deletion: 1,
filename: "README.md",
oldFilename: "README.md",
skipTo: "README.md",
},
}
for _, testcase := range tests {
t.Run(testcase.name, func(t *testing.T) {
got, err := ParsePatch(db.DefaultContext, setting.Git.MaxGitDiffLines, setting.Git.MaxGitDiffLineCharacters, setting.Git.MaxGitDiffFiles, strings.NewReader(testcase.gitdiff), testcase.skipTo)
if (err != nil) != testcase.wantErr {
t.Errorf("ParsePatch(%q) error = %v, wantErr %v", testcase.name, err, testcase.wantErr)
return
}
gotMarshaled, _ := json.MarshalIndent(got, "", " ")
if len(got.Files) != 1 {
t.Errorf("ParsePath(%q) did not receive 1 file:\n%s", testcase.name, string(gotMarshaled))
return
}
file := got.Files[0]
if file.Addition != testcase.addition {
t.Errorf("ParsePath(%q) does not have correct file addition %d, wanted %d", testcase.name, file.Addition, testcase.addition)
}
if file.Deletion != testcase.deletion {
t.Errorf("ParsePath(%q) did not have correct file deletion %d, wanted %d", testcase.name, file.Deletion, testcase.deletion)
}
if file.OldName != testcase.oldFilename {
t.Errorf("ParsePath(%q) did not have correct OldName %q, wanted %q", testcase.name, file.OldName, testcase.oldFilename)
}
if file.Name != testcase.filename {
t.Errorf("ParsePath(%q) did not have correct Name %q, wanted %q", testcase.name, file.Name, testcase.filename)
}
})
}
}
func TestParsePatch_singlefile(t *testing.T) {
type testcase struct {
name string
gitdiff string
wantErr bool
addition int
deletion int
oldFilename string
filename string
}
tests := []testcase{
{
name: "readme.md2readme.md",
gitdiff: `diff --git "\\a/README.md" "\\b/README.md"
--- "\\a/README.md"
+++ "\\b/README.md"
@@ -1,3 +1,6 @@
# gitea-github-migrator
+
+ Build Status
- Latest Release
Docker Pulls
+ cut off
+ cut off
`,
addition: 4,
deletion: 1,
filename: "README.md",
oldFilename: "README.md",
},
{
name: "A \\ B",
gitdiff: `diff --git "a/A \\ B" "b/A \\ B"
--- "a/A \\ B"
+++ "b/A \\ B"
@@ -1,3 +1,6 @@
# gitea-github-migrator
+
+ Build Status
- Latest Release
Docker Pulls
+ cut off
+ cut off`,
addition: 4,
deletion: 1,
filename: "A \\ B",
oldFilename: "A \\ B",
},
{
name: "really weird filename",
gitdiff: `diff --git "\\a/a b/file b/a a/file" "\\b/a b/file b/a a/file"
index d2186f1..f5c8ed2 100644
--- "\\a/a b/file b/a a/file" ` + `
+++ "\\b/a b/file b/a a/file" ` + `
@@ -1,3 +1,2 @@
Create a weird file.
` + `
-and what does diff do here?
\ No newline at end of file`,
addition: 0,
deletion: 1,
filename: "a b/file b/a a/file",
oldFilename: "a b/file b/a a/file",
},
{
name: "delete file with blanks",
gitdiff: `diff --git "\\a/file with blanks" "\\b/file with blanks"
deleted file mode 100644
index 898651a..0000000
--- "\\a/file with blanks" ` + `
+++ /dev/null
@@ -1,5 +0,0 @@
-a blank file
-
-has a couple o line
-
-the 5th line is the last
`,
addition: 0,
deletion: 5,
filename: "file with blanks",
oldFilename: "file with blanks",
},
{
name: "rename a—as",
gitdiff: `diff --git "a/\360\243\220\265b\342\200\240vs" "b/a\342\200\224as"
similarity index 100%
rename from "\360\243\220\265b\342\200\240vs"
rename to "a\342\200\224as"
`,
addition: 0,
deletion: 0,
oldFilename: "𣐵b†vs",
filename: "a—as",
},
{
name: "rename with spaces",
gitdiff: `diff --git "\\a/a b/file b/a a/file" "\\b/a b/a a/file b/b file"
similarity index 100%
rename from a b/file b/a a/file
rename to a b/a a/file b/b file
`,
oldFilename: "a b/file b/a a/file",
filename: "a b/a a/file b/b file",
},
{
name: "ambiguous deleted",
gitdiff: `diff --git a/b b/b b/b b/b
deleted file mode 100644
index 92e798b..0000000
--- a/b b/b` + "\t" + `
+++ /dev/null
@@ -1 +0,0 @@
-b b/b
`,
oldFilename: "b b/b",
filename: "b b/b",
addition: 0,
deletion: 1,
},
{
name: "ambiguous addition",
gitdiff: `diff --git a/b b/b b/b b/b
new file mode 100644
index 0000000..92e798b
--- /dev/null
+++ b/b b/b` + "\t" + `
@@ -0,0 +1 @@
+b b/b
`,
oldFilename: "b b/b",
filename: "b b/b",
addition: 1,
deletion: 0,
},
{
name: "rename",
gitdiff: `diff --git a/b b/b b/b b/b b/b b/b
similarity index 100%
rename from b b/b b/b b/b b/b
rename to b
`,
oldFilename: "b b/b b/b b/b b/b",
filename: "b",
},
{
name: "ambiguous 1",
gitdiff: `diff --git a/b b/b b/b b/b b/b b/b
similarity index 100%
rename from b b/b b/b b/b b/b
rename to b
`,
oldFilename: "b b/b b/b b/b b/b",
filename: "b",
},
{
name: "ambiguous 2",
gitdiff: `diff --git a/b b/b b/b b/b b/b b/b
similarity index 100%
rename from b b/b b/b b/b
rename to b b/b
`,
oldFilename: "b b/b b/b b/b",
filename: "b b/b",
},
{
name: "minuses-and-pluses",
gitdiff: `diff --git a/minuses-and-pluses b/minuses-and-pluses
index 6961180..9ba1a00 100644
--- a/minuses-and-pluses
+++ b/minuses-and-pluses
@@ -1,4 +1,4 @@
--- 1st line
-++ 2nd line
--- 3rd line
-++ 4th line
+++ 1st line
+-- 2nd line
+++ 3rd line
+-- 4th line
`,
oldFilename: "minuses-and-pluses",
filename: "minuses-and-pluses",
addition: 4,
deletion: 4,
},
}
for _, testcase := range tests {
t.Run(testcase.name, func(t *testing.T) {
got, err := ParsePatch(db.DefaultContext, setting.Git.MaxGitDiffLines, setting.Git.MaxGitDiffLineCharacters, setting.Git.MaxGitDiffFiles, strings.NewReader(testcase.gitdiff), "")
if (err != nil) != testcase.wantErr {
t.Errorf("ParsePatch(%q) error = %v, wantErr %v", testcase.name, err, testcase.wantErr)
return
}
gotMarshaled, _ := json.MarshalIndent(got, "", " ")
if len(got.Files) != 1 {
t.Errorf("ParsePath(%q) did not receive 1 file:\n%s", testcase.name, string(gotMarshaled))
return
}
file := got.Files[0]
if file.Addition != testcase.addition {
t.Errorf("ParsePath(%q) does not have correct file addition %d, wanted %d", testcase.name, file.Addition, testcase.addition)
}
if file.Deletion != testcase.deletion {
t.Errorf("ParsePath(%q) did not have correct file deletion %d, wanted %d", testcase.name, file.Deletion, testcase.deletion)
}
if file.OldName != testcase.oldFilename {
t.Errorf("ParsePath(%q) did not have correct OldName %q, wanted %q", testcase.name, file.OldName, testcase.oldFilename)
}
if file.Name != testcase.filename {
t.Errorf("ParsePath(%q) did not have correct Name %q, wanted %q", testcase.name, file.Name, testcase.filename)
}
})
}
// Test max lines
diffBuilder := &strings.Builder{}
diff := `diff --git a/newfile2 b/newfile2
new file mode 100644
index 0000000..6bb8f39
--- /dev/null
+++ b/newfile2
@@ -0,0 +1,35 @@
`
diffBuilder.WriteString(diff)
for i := range 35 {
diffBuilder.WriteString("+line" + strconv.Itoa(i) + "\n")
}
diff = diffBuilder.String()
result, err := ParsePatch(db.DefaultContext, 20, setting.Git.MaxGitDiffLineCharacters, setting.Git.MaxGitDiffFiles, strings.NewReader(diff), "")
if err != nil {
t.Errorf("There should not be an error: %v", err)
}
if !result.Files[0].IsIncomplete {
t.Errorf("Files should be incomplete! %v", result.Files[0])
}
result, err = ParsePatch(db.DefaultContext, 40, setting.Git.MaxGitDiffLineCharacters, setting.Git.MaxGitDiffFiles, strings.NewReader(diff), "")
if err != nil {
t.Errorf("There should not be an error: %v", err)
}
if result.Files[0].IsIncomplete {
t.Errorf("Files should not be incomplete! %v", result.Files[0])
}
result, err = ParsePatch(db.DefaultContext, 40, 5, setting.Git.MaxGitDiffFiles, strings.NewReader(diff), "")
if err != nil {
t.Errorf("There should not be an error: %v", err)
}
if !result.Files[0].IsIncomplete {
t.Errorf("Files should be incomplete! %v", result.Files[0])
}
// Test max characters
diff = `diff --git a/newfile2 b/newfile2
new file mode 100644
index 0000000..6bb8f39
--- /dev/null
+++ b/newfile2
@@ -0,0 +1,35 @@
`
diffBuilder.Reset()
diffBuilder.WriteString(diff)
for i := range 33 {
diffBuilder.WriteString("+line" + strconv.Itoa(i) + "\n")
}
diffBuilder.WriteString("+line33")
for range 512 {
diffBuilder.WriteString("0123456789ABCDEF")
}
diffBuilder.WriteByte('\n')
diffBuilder.WriteString("+line" + strconv.Itoa(34) + "\n")
diffBuilder.WriteString("+line" + strconv.Itoa(35) + "\n")
diff = diffBuilder.String()
result, err = ParsePatch(db.DefaultContext, 20, 4096, setting.Git.MaxGitDiffFiles, strings.NewReader(diff), "")
if err != nil {
t.Errorf("There should not be an error: %v", err)
}
if !result.Files[0].IsIncomplete {
t.Errorf("Files should be incomplete! %v", result.Files[0])
}
result, err = ParsePatch(db.DefaultContext, 40, 4096, setting.Git.MaxGitDiffFiles, strings.NewReader(diff), "")
if err != nil {
t.Errorf("There should not be an error: %v", err)
}
if !result.Files[0].IsIncomplete {
t.Errorf("Files should be incomplete! %v", result.Files[0])
}
diff = `diff --git "a/README.md" "b/README.md"
--- a/README.md
+++ b/README.md
@@ -1,3 +1,6 @@
# gitea-github-migrator
+
+ Build Status
- Latest Release
Docker Pulls
+ cut off
+ cut off`
_, err = ParsePatch(db.DefaultContext, setting.Git.MaxGitDiffLines, setting.Git.MaxGitDiffLineCharacters, setting.Git.MaxGitDiffFiles, strings.NewReader(diff), "")
if err != nil {
t.Errorf("ParsePatch failed: %s", err)
}
diff2 := `diff --git "a/A \\ B" "b/A \\ B"
--- "a/A \\ B"
+++ "b/A \\ B"
@@ -1,3 +1,6 @@
# gitea-github-migrator
+
+ Build Status
- Latest Release
Docker Pulls
+ cut off
+ cut off`
_, err = ParsePatch(db.DefaultContext, setting.Git.MaxGitDiffLines, setting.Git.MaxGitDiffLineCharacters, setting.Git.MaxGitDiffFiles, strings.NewReader(diff2), "")
if err != nil {
t.Errorf("ParsePatch failed: %s", err)
}
diff2a := `diff --git "a/A \\ B" b/A/B
--- "a/A \\ B"
+++ b/A/B
@@ -1,3 +1,6 @@
# gitea-github-migrator
+
+ Build Status
- Latest Release
Docker Pulls
+ cut off
+ cut off`
_, err = ParsePatch(db.DefaultContext, setting.Git.MaxGitDiffLines, setting.Git.MaxGitDiffLineCharacters, setting.Git.MaxGitDiffFiles, strings.NewReader(diff2a), "")
if err != nil {
t.Errorf("ParsePatch failed: %s", err)
}
diff3 := `diff --git a/README.md b/README.md
--- a/README.md
+++ b/README.md
@@ -1,3 +1,6 @@
# gitea-github-migrator
+
+ Build Status
- Latest Release
Docker Pulls
+ cut off
+ cut off`
_, err = ParsePatch(db.DefaultContext, setting.Git.MaxGitDiffLines, setting.Git.MaxGitDiffLineCharacters, setting.Git.MaxGitDiffFiles, strings.NewReader(diff3), "")
if err != nil {
t.Errorf("ParsePatch failed: %s", err)
}
}
func setupDefaultDiff() *Diff {
return &Diff{
Files: []*DiffFile{
{
Name: "README.md",
Sections: []*DiffSection{
{
Lines: []*DiffLine{
{
LeftIdx: 4,
RightIdx: 4,
},
},
},
},
},
},
}
}
func TestDiff_LoadCommentsNoOutdated(t *testing.T) {
assert.NoError(t, unittest.PrepareTestDatabase())
issue := unittest.AssertExistsAndLoadBean(t, &issues_model.Issue{ID: 2})
user := unittest.AssertExistsAndLoadBean(t, &user_model.User{ID: 1})
diff := setupDefaultDiff()
assert.NoError(t, diff.LoadComments(db.DefaultContext, issue, user, false))
assert.Len(t, diff.Files[0].Sections[0].Lines[0].Comments, 2)
}
func TestDiff_LoadCommentsWithOutdated(t *testing.T) {
assert.NoError(t, unittest.PrepareTestDatabase())
issue := unittest.AssertExistsAndLoadBean(t, &issues_model.Issue{ID: 2})
user := unittest.AssertExistsAndLoadBean(t, &user_model.User{ID: 1})
diff := setupDefaultDiff()
assert.NoError(t, diff.LoadComments(db.DefaultContext, issue, user, true))
assert.Len(t, diff.Files[0].Sections[0].Lines[0].Comments, 3)
}
func TestDiffLine_CanComment(t *testing.T) {
assert.False(t, (&DiffLine{Type: DiffLineSection}).CanComment())
assert.False(t, (&DiffLine{Type: DiffLineAdd, Comments: []*issues_model.Comment{{Content: "bla"}}}).CanComment())
assert.True(t, (&DiffLine{Type: DiffLineAdd}).CanComment())
assert.True(t, (&DiffLine{Type: DiffLineDel}).CanComment())
assert.True(t, (&DiffLine{Type: DiffLinePlain}).CanComment())
}
func TestDiffLine_GetCommentSide(t *testing.T) {
assert.Equal(t, "previous", (&DiffLine{Comments: []*issues_model.Comment{{Line: -3}}}).GetCommentSide())
assert.Equal(t, "proposed", (&DiffLine{Comments: []*issues_model.Comment{{Line: 3}}}).GetCommentSide())
}
func TestGetDiffRangeWithWhitespaceBehavior(t *testing.T) {
gitRepo, err := git.OpenRepository(t.Context(), "../../modules/git/tests/repos/repo5_pulls")
require.NoError(t, err)
defer gitRepo.Close()
for _, behavior := range []git.TrustedCmdArgs{{"-w"}, {"--ignore-space-at-eol"}, {"-b"}, nil} {
diffs, err := GetDiffForAPI(t.Context(), gitRepo,
&DiffOptions{
AfterCommitID: "d8e0bbb45f200e67d9a784ce55bd90821af45ebd",
BeforeCommitID: "72866af952e98d02a73003501836074b286a78f6",
MaxLines: setting.Git.MaxGitDiffLines,
MaxLineCharacters: setting.Git.MaxGitDiffLineCharacters,
MaxFiles: 1,
WhitespaceBehavior: behavior,
})
require.NoError(t, err, "Error when diff with WhitespaceBehavior=%s", behavior)
assert.True(t, diffs.IsIncomplete)
assert.Len(t, diffs.Files, 1)
for _, f := range diffs.Files {
assert.NotEmpty(t, f.Sections, "Diff file %q should have sections", f.Name)
}
}
}
func TestNoCrashes(t *testing.T) {
type testcase struct {
gitdiff string
}
tests := []testcase{
{
gitdiff: "diff --git \n--- a\t\n",
},
{
gitdiff: "diff --git \"0\n",
},
}
for _, testcase := range tests {
// It shouldn't crash, so don't care about the output.
ParsePatch(db.DefaultContext, setting.Git.MaxGitDiffLines, setting.Git.MaxGitDiffLineCharacters, setting.Git.MaxGitDiffFiles, strings.NewReader(testcase.gitdiff), "")
}
}

View File

@@ -0,0 +1,265 @@
// Copyright 2022 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package gitdiff
import (
"bytes"
"html/template"
"strings"
"github.com/sergi/go-diff/diffmatchpatch"
)
// token is a html tag or entity, eg: "<span ...>", "</span>", "&lt;"
func extractHTMLToken(s string) (before, token, after string, valid bool) {
for pos1 := 0; pos1 < len(s); pos1++ {
switch s[pos1] {
case '<':
pos2 := strings.IndexByte(s[pos1:], '>')
if pos2 == -1 {
return "", "", s, false
}
return s[:pos1], s[pos1 : pos1+pos2+1], s[pos1+pos2+1:], true
case '&':
pos2 := strings.IndexByte(s[pos1:], ';')
if pos2 == -1 {
return "", "", s, false
}
return s[:pos1], s[pos1 : pos1+pos2+1], s[pos1+pos2+1:], true
}
}
return "", "", s, true
}
// highlightCodeDiff is used to do diff with highlighted HTML code.
// It totally depends on Chroma's valid HTML output and its structure, do not use these functions for other purposes.
// The HTML tags and entities will be replaced by Unicode placeholders: "<span>{TEXT}</span>" => "\uE000{TEXT}\uE001"
// These Unicode placeholders are friendly to the diff.
// Then after diff, the placeholders in diff result will be recovered to the HTML tags and entities.
// It's guaranteed that the tags in final diff result are paired correctly.
type highlightCodeDiff struct {
placeholderBegin rune
placeholderMaxCount int
placeholderIndex int
placeholderTokenMap map[rune]string
tokenPlaceholderMap map[string]rune
placeholderOverflowCount int
lineWrapperTags []string
}
func newHighlightCodeDiff() *highlightCodeDiff {
return &highlightCodeDiff{
placeholderBegin: rune(0x100000), // Plane 16: Supplementary Private Use Area B (U+100000..U+10FFFD)
placeholderMaxCount: 64000,
placeholderTokenMap: map[rune]string{},
tokenPlaceholderMap: map[string]rune{},
}
}
// nextPlaceholder returns 0 if no more placeholder can be used
// the diff is done line by line, usually there are only a few (no more than 10) placeholders in one line
// so the placeholderMaxCount is impossible to be exhausted in real cases.
func (hcd *highlightCodeDiff) nextPlaceholder() rune {
for hcd.placeholderIndex < hcd.placeholderMaxCount {
r := hcd.placeholderBegin + rune(hcd.placeholderIndex)
hcd.placeholderIndex++
// only use non-existing (not used by code) rune as placeholders
if _, ok := hcd.placeholderTokenMap[r]; !ok {
return r
}
}
return 0 // no more available placeholder
}
func (hcd *highlightCodeDiff) isInPlaceholderRange(r rune) bool {
return hcd.placeholderBegin <= r && r < hcd.placeholderBegin+rune(hcd.placeholderMaxCount)
}
func (hcd *highlightCodeDiff) collectUsedRunes(code template.HTML) {
for _, r := range code {
if hcd.isInPlaceholderRange(r) {
// put the existing rune (used by code) in map, then this rune won't be used a placeholder anymore.
hcd.placeholderTokenMap[r] = ""
}
}
}
func (hcd *highlightCodeDiff) diffLineWithHighlight(lineType DiffLineType, codeA, codeB template.HTML) template.HTML {
return hcd.diffLineWithHighlightWrapper(nil, lineType, codeA, codeB)
}
func (hcd *highlightCodeDiff) diffLineWithHighlightWrapper(lineWrapperTags []string, lineType DiffLineType, codeA, codeB template.HTML) template.HTML {
hcd.collectUsedRunes(codeA)
hcd.collectUsedRunes(codeB)
convertedCodeA := hcd.convertToPlaceholders(codeA)
convertedCodeB := hcd.convertToPlaceholders(codeB)
dmp := defaultDiffMatchPatch()
diffs := dmp.DiffMain(convertedCodeA, convertedCodeB, true)
diffs = dmp.DiffCleanupSemantic(diffs)
buf := bytes.NewBuffer(nil)
// restore the line wrapper tags <span class="line"> and <span class="cl">, if necessary
for _, tag := range lineWrapperTags {
buf.WriteString(tag)
}
addedCodePrefix := hcd.registerTokenAsPlaceholder(`<span class="added-code">`)
removedCodePrefix := hcd.registerTokenAsPlaceholder(`<span class="removed-code">`)
codeTagSuffix := hcd.registerTokenAsPlaceholder(`</span>`)
if codeTagSuffix != 0 {
for _, diff := range diffs {
switch {
case diff.Type == diffmatchpatch.DiffEqual:
buf.WriteString(diff.Text)
case diff.Type == diffmatchpatch.DiffInsert && lineType == DiffLineAdd:
buf.WriteRune(addedCodePrefix)
buf.WriteString(diff.Text)
buf.WriteRune(codeTagSuffix)
case diff.Type == diffmatchpatch.DiffDelete && lineType == DiffLineDel:
buf.WriteRune(removedCodePrefix)
buf.WriteString(diff.Text)
buf.WriteRune(codeTagSuffix)
}
}
} else {
// placeholder map space is exhausted
for _, diff := range diffs {
take := diff.Type == diffmatchpatch.DiffEqual || (diff.Type == diffmatchpatch.DiffInsert && lineType == DiffLineAdd) || (diff.Type == diffmatchpatch.DiffDelete && lineType == DiffLineDel)
if take {
buf.WriteString(diff.Text)
}
}
}
for range lineWrapperTags {
buf.WriteString("</span>")
}
return hcd.recoverOneDiff(buf.String())
}
func (hcd *highlightCodeDiff) registerTokenAsPlaceholder(token string) rune {
placeholder, ok := hcd.tokenPlaceholderMap[token]
if !ok {
placeholder = hcd.nextPlaceholder()
if placeholder != 0 {
hcd.tokenPlaceholderMap[token] = placeholder
hcd.placeholderTokenMap[placeholder] = token
}
}
return placeholder
}
// convertToPlaceholders totally depends on Chroma's valid HTML output and its structure, do not use these functions for other purposes.
func (hcd *highlightCodeDiff) convertToPlaceholders(htmlContent template.HTML) string {
var tagStack []string
res := strings.Builder{}
firstRunForLineTags := hcd.lineWrapperTags == nil
var beforeToken, token string
var valid bool
htmlCode := string(htmlContent)
// the standard chroma highlight HTML is "<span class="line [hl]"><span class="cl"> ... </span></span>"
for {
beforeToken, token, htmlCode, valid = extractHTMLToken(htmlCode)
if !valid || token == "" {
break
}
// write the content before the token into result string, and consume the token in the string
res.WriteString(beforeToken)
// the line wrapper tags should be removed before diff
if strings.HasPrefix(token, `<span class="line`) || strings.HasPrefix(token, `<span class="cl"`) {
if firstRunForLineTags {
// if this is the first run for converting, save the line wrapper tags for later use, they should be added back
hcd.lineWrapperTags = append(hcd.lineWrapperTags, token)
}
htmlCode = strings.TrimSuffix(htmlCode, "</span>")
continue
}
var tokenInMap string
if strings.HasSuffix(token, "</") { // for closing tag
if len(tagStack) == 0 {
break // invalid diff result, no opening tag but see closing tag
}
// make sure the closing tag in map is related to the open tag, to make the diff algorithm can match the opening/closing tags
// the closing tag will be recorded in the map by key "</span><!-- <span the-opening> -->" for "<span the-opening>"
tokenInMap = token + "<!-- " + tagStack[len(tagStack)-1] + "-->"
tagStack = tagStack[:len(tagStack)-1]
} else if token[0] == '<' { // for opening tag
tokenInMap = token
tagStack = append(tagStack, token)
} else if token[0] == '&' { // for html entity
tokenInMap = token
} // else: impossible
// remember the placeholder and token in the map
placeholder := hcd.registerTokenAsPlaceholder(tokenInMap)
if placeholder != 0 {
res.WriteRune(placeholder) // use the placeholder to replace the token
} else {
// unfortunately, all private use runes has been exhausted, no more placeholder could be used, no more converting
// usually, the exhausting won't occur in real cases, the magnitude of used placeholders is not larger than that of the CSS classes outputted by chroma.
hcd.placeholderOverflowCount++
if strings.HasPrefix(token, "&") {
// when the token is a html entity, something must be outputted even if there is no placeholder.
res.WriteRune(0xFFFD) // replacement character TODO: how to handle this case more gracefully?
res.WriteString(token[1:]) // still output the entity code part, otherwise there will be no diff result.
}
}
}
// write the remaining string
res.WriteString(htmlCode)
return res.String()
}
func (hcd *highlightCodeDiff) recoverOneDiff(str string) template.HTML {
sb := strings.Builder{}
var tagStack []string
for _, r := range str {
token, ok := hcd.placeholderTokenMap[r]
if !ok || token == "" {
sb.WriteRune(r) // if the rune is not a placeholder, write it as it is
continue
}
var tokenToRecover string
if strings.HasPrefix(token, "</") { // for closing tag
// only get the tag itself, ignore the trailing comment (for how the comment is generated, see the code in `convert` function)
tokenToRecover = token[:strings.IndexByte(token, '>')+1]
if len(tagStack) == 0 {
continue // if no opening tag in stack yet, skip the closing tag
}
tagStack = tagStack[:len(tagStack)-1]
} else if token[0] == '<' { // for opening tag
tokenToRecover = token
tagStack = append(tagStack, token)
} else if token[0] == '&' { // for html entity
tokenToRecover = token
} // else: impossible
sb.WriteString(tokenToRecover)
}
if len(tagStack) > 0 {
// close all opening tags
for i := len(tagStack) - 1; i >= 0; i-- {
tagToClose := tagStack[i]
// get the closing tag "</span>" from "<span class=...>" or "<span>"
pos := strings.IndexAny(tagToClose, " >")
if pos != -1 {
sb.WriteString("</" + tagToClose[1:pos] + ">")
} // else: impossible. every tag was pushed into the stack by the code above and is valid HTML opening tag
}
}
return template.HTML(sb.String())
}

View File

@@ -0,0 +1,86 @@
// Copyright 2022 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package gitdiff
import (
"fmt"
"html/template"
"strings"
"testing"
"github.com/stretchr/testify/assert"
)
func TestDiffWithHighlight(t *testing.T) {
t.Run("DiffLineAddDel", func(t *testing.T) {
hcd := newHighlightCodeDiff()
codeA := template.HTML(`x <span class="k">foo</span> y`)
codeB := template.HTML(`x <span class="k">bar</span> y`)
outDel := hcd.diffLineWithHighlight(DiffLineDel, codeA, codeB)
assert.Equal(t, `x <span class="k"><span class="removed-code">foo</span></span> y`, string(outDel))
outAdd := hcd.diffLineWithHighlight(DiffLineAdd, codeA, codeB)
assert.Equal(t, `x <span class="k"><span class="added-code">bar</span></span> y`, string(outAdd))
})
t.Run("CleanUp", func(t *testing.T) {
hcd := newHighlightCodeDiff()
codeA := template.HTML(`<span class="cm>this is a comment</span>`)
codeB := template.HTML(`<span class="cm>this is updated comment</span>`)
outDel := hcd.diffLineWithHighlight(DiffLineDel, codeA, codeB)
assert.Equal(t, `<span class="cm>this is <span class="removed-code">a</span> comment</span>`, string(outDel))
outAdd := hcd.diffLineWithHighlight(DiffLineAdd, codeA, codeB)
assert.Equal(t, `<span class="cm>this is <span class="added-code">updated</span> comment</span>`, string(outAdd))
})
t.Run("OpenCloseTags", func(t *testing.T) {
hcd := newHighlightCodeDiff()
hcd.placeholderTokenMap['O'], hcd.placeholderTokenMap['C'] = "<span>", "</span>"
assert.Equal(t, "<span></span>", string(hcd.recoverOneDiff("OC")))
assert.Equal(t, "<span></span>", string(hcd.recoverOneDiff("O")))
assert.Empty(t, string(hcd.recoverOneDiff("C")))
})
}
func TestDiffWithHighlightPlaceholder(t *testing.T) {
hcd := newHighlightCodeDiff()
output := hcd.diffLineWithHighlight(DiffLineDel, "a='\U00100000'", "a='\U0010FFFD''")
assert.Empty(t, hcd.placeholderTokenMap[0x00100000])
assert.Empty(t, hcd.placeholderTokenMap[0x0010FFFD])
expected := fmt.Sprintf(`a='<span class="removed-code">%s</span>'`, "\U00100000")
assert.Equal(t, expected, string(output))
hcd = newHighlightCodeDiff()
output = hcd.diffLineWithHighlight(DiffLineAdd, "a='\U00100000'", "a='\U0010FFFD'")
expected = fmt.Sprintf(`a='<span class="added-code">%s</span>'`, "\U0010FFFD")
assert.Equal(t, expected, string(output))
}
func TestDiffWithHighlightPlaceholderExhausted(t *testing.T) {
hcd := newHighlightCodeDiff()
hcd.placeholderMaxCount = 0
placeHolderAmp := string(rune(0xFFFD))
output := hcd.diffLineWithHighlight(DiffLineDel, `<span class="k">&lt;</span>`, `<span class="k">&gt;</span>`)
assert.Equal(t, placeHolderAmp+"lt;", string(output))
output = hcd.diffLineWithHighlight(DiffLineAdd, `<span class="k">&lt;</span>`, `<span class="k">&gt;</span>`)
assert.Equal(t, placeHolderAmp+"gt;", string(output))
}
func TestDiffWithHighlightTagMatch(t *testing.T) {
f := func(t *testing.T, lineType DiffLineType) {
totalOverflow := 0
for i := 0; ; i++ {
hcd := newHighlightCodeDiff()
hcd.placeholderMaxCount = i
output := string(hcd.diffLineWithHighlight(lineType, `<span class="k">&lt;</span>`, `<span class="k">&gt;</span>`))
totalOverflow += hcd.placeholderOverflowCount
assert.Equal(t, strings.Count(output, "<span"), strings.Count(output, "</span"))
if hcd.placeholderOverflowCount == 0 {
break
}
}
assert.NotZero(t, totalOverflow)
}
t.Run("DiffLineAdd", func(t *testing.T) { f(t, DiffLineAdd) })
t.Run("DiffLineDel", func(t *testing.T) { f(t, DiffLineDel) })
}

View File

@@ -0,0 +1,18 @@
// Copyright 2019 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package gitdiff
import (
"testing"
"code.gitea.io/gitea/models/unittest"
_ "code.gitea.io/gitea/models"
_ "code.gitea.io/gitea/models/actions"
_ "code.gitea.io/gitea/models/activities"
)
func TestMain(m *testing.M) {
unittest.MainTest(m)
}

View File

@@ -0,0 +1,65 @@
// Copyright 2025 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package gitdiff
import (
"context"
"html/template"
"code.gitea.io/gitea/modules/base"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/htmlutil"
"code.gitea.io/gitea/modules/log"
)
type SubmoduleDiffInfo struct {
SubmoduleName string
SubmoduleFile *git.CommitSubmoduleFile // it might be nil if the submodule is not found or unable to parse
NewRefID string
PreviousRefID string
}
func (si *SubmoduleDiffInfo) PopulateURL(diffFile *DiffFile, leftCommit, rightCommit *git.Commit) {
si.SubmoduleName = diffFile.Name
submoduleCommit := rightCommit // If the submodule is added or updated, check at the right commit
if diffFile.IsDeleted {
submoduleCommit = leftCommit // If the submodule is deleted, check at the left commit
}
if submoduleCommit == nil {
return
}
submodule, err := submoduleCommit.GetSubModule(diffFile.GetDiffFileName())
if err != nil {
log.Error("Unable to PopulateURL for submodule %q: GetSubModule: %v", diffFile.GetDiffFileName(), err)
return // ignore the error, do not cause 500 errors for end users
}
if submodule != nil {
si.SubmoduleFile = git.NewCommitSubmoduleFile(submodule.URL, submoduleCommit.ID.String())
}
}
func (si *SubmoduleDiffInfo) CommitRefIDLinkHTML(ctx context.Context, commitID string) template.HTML {
webLink := si.SubmoduleFile.SubmoduleWebLink(ctx, commitID)
if webLink == nil {
return htmlutil.HTMLFormat("%s", base.ShortSha(commitID))
}
return htmlutil.HTMLFormat(`<a href="%s">%s</a>`, webLink.CommitWebLink, base.ShortSha(commitID))
}
func (si *SubmoduleDiffInfo) CompareRefIDLinkHTML(ctx context.Context) template.HTML {
webLink := si.SubmoduleFile.SubmoduleWebLink(ctx, si.PreviousRefID, si.NewRefID)
if webLink == nil {
return htmlutil.HTMLFormat("%s...%s", base.ShortSha(si.PreviousRefID), base.ShortSha(si.NewRefID))
}
return htmlutil.HTMLFormat(`<a href="%s">%s...%s</a>`, webLink.CommitWebLink, base.ShortSha(si.PreviousRefID), base.ShortSha(si.NewRefID))
}
func (si *SubmoduleDiffInfo) SubmoduleRepoLinkHTML(ctx context.Context) template.HTML {
webLink := si.SubmoduleFile.SubmoduleWebLink(ctx)
if webLink == nil {
return htmlutil.HTMLFormat("%s", si.SubmoduleName)
}
return htmlutil.HTMLFormat(`<a href="%s">%s</a>`, webLink.RepoWebLink, si.SubmoduleName)
}

View File

@@ -0,0 +1,234 @@
// Copyright 2025 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package gitdiff
import (
"strings"
"testing"
"code.gitea.io/gitea/models/db"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/setting"
"github.com/stretchr/testify/assert"
)
func TestParseSubmoduleInfo(t *testing.T) {
type testcase struct {
name string
gitdiff string
infos map[int]SubmoduleDiffInfo
}
tests := []testcase{
{
name: "added",
gitdiff: `diff --git a/.gitmodules b/.gitmodules
new file mode 100644
index 0000000..4ac13c1
--- /dev/null
+++ b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "gitea-mirror"]
+ path = gitea-mirror
+ url = https://gitea.com/gitea/gitea-mirror
diff --git a/gitea-mirror b/gitea-mirror
new file mode 160000
index 0000000..68972a9
--- /dev/null
+++ b/gitea-mirror
@@ -0,0 +1 @@
+Subproject commit 68972a994719ae5c74e28d8fa82fa27c23399bc8
`,
infos: map[int]SubmoduleDiffInfo{
1: {NewRefID: "68972a994719ae5c74e28d8fa82fa27c23399bc8"},
},
},
{
name: "updated",
gitdiff: `diff --git a/gitea-mirror b/gitea-mirror
index 68972a9..c8ffe77 160000
--- a/gitea-mirror
+++ b/gitea-mirror
@@ -1 +1 @@
-Subproject commit 68972a994719ae5c74e28d8fa82fa27c23399bc8
+Subproject commit c8ffe777cf9c5bb47a38e3e0b3a3b5de6cd8813d
`,
infos: map[int]SubmoduleDiffInfo{
0: {
PreviousRefID: "68972a994719ae5c74e28d8fa82fa27c23399bc8",
NewRefID: "c8ffe777cf9c5bb47a38e3e0b3a3b5de6cd8813d",
},
},
},
{
name: "rename",
gitdiff: `diff --git a/.gitmodules b/.gitmodules
index 4ac13c1..0510edd 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,3 +1,3 @@
[submodule "gitea-mirror"]
- path = gitea-mirror
+ path = gitea
url = https://gitea.com/gitea/gitea-mirror
diff --git a/gitea-mirror b/gitea
similarity index 100%
rename from gitea-mirror
rename to gitea
`,
},
{
name: "deleted",
gitdiff: `diff --git a/.gitmodules b/.gitmodules
index 0510edd..e69de29 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,3 +0,0 @@
-[submodule "gitea-mirror"]
- path = gitea
- url = https://gitea.com/gitea/gitea-mirror
diff --git a/gitea b/gitea
deleted file mode 160000
index c8ffe77..0000000
--- a/gitea
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit c8ffe777cf9c5bb47a38e3e0b3a3b5de6cd8813d
`,
infos: map[int]SubmoduleDiffInfo{
1: {
PreviousRefID: "c8ffe777cf9c5bb47a38e3e0b3a3b5de6cd8813d",
},
},
},
{
name: "moved and updated",
gitdiff: `diff --git a/.gitmodules b/.gitmodules
index 0510edd..bced3d8 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,3 +1,3 @@
[submodule "gitea-mirror"]
- path = gitea
+ path = gitea-1.22
url = https://gitea.com/gitea/gitea-mirror
diff --git a/gitea b/gitea
deleted file mode 160000
index c8ffe77..0000000
--- a/gitea
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit c8ffe777cf9c5bb47a38e3e0b3a3b5de6cd8813d
diff --git a/gitea-1.22 b/gitea-1.22
new file mode 160000
index 0000000..8eefa1f
--- /dev/null
+++ b/gitea-1.22
@@ -0,0 +1 @@
+Subproject commit 8eefa1f6dedf2488db2c9e12c916e8e51f673160
`,
infos: map[int]SubmoduleDiffInfo{
1: {
PreviousRefID: "c8ffe777cf9c5bb47a38e3e0b3a3b5de6cd8813d",
},
2: {
NewRefID: "8eefa1f6dedf2488db2c9e12c916e8e51f673160",
},
},
},
{
name: "converted to file",
gitdiff: `diff --git a/.gitmodules b/.gitmodules
index 0510edd..e69de29 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,3 +0,0 @@
-[submodule "gitea-mirror"]
- path = gitea
- url = https://gitea.com/gitea/gitea-mirror
diff --git a/gitea b/gitea
deleted file mode 160000
index c8ffe77..0000000
--- a/gitea
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit c8ffe777cf9c5bb47a38e3e0b3a3b5de6cd8813d
diff --git a/gitea b/gitea
new file mode 100644
index 0000000..33a9488
--- /dev/null
+++ b/gitea
@@ -0,0 +1 @@
+example
`,
infos: map[int]SubmoduleDiffInfo{
1: {
PreviousRefID: "c8ffe777cf9c5bb47a38e3e0b3a3b5de6cd8813d",
},
},
},
{
name: "converted to submodule",
gitdiff: `diff --git a/.gitmodules b/.gitmodules
index e69de29..14ee267 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "gitea"]
+ path = gitea
+ url = https://gitea.com/gitea/gitea-mirror
diff --git a/gitea b/gitea
deleted file mode 100644
index 33a9488..0000000
--- a/gitea
+++ /dev/null
@@ -1 +0,0 @@
-example
diff --git a/gitea b/gitea
new file mode 160000
index 0000000..68972a9
--- /dev/null
+++ b/gitea
@@ -0,0 +1 @@
+Subproject commit 68972a994719ae5c74e28d8fa82fa27c23399bc8
`,
infos: map[int]SubmoduleDiffInfo{
2: {
NewRefID: "68972a994719ae5c74e28d8fa82fa27c23399bc8",
},
},
},
}
for _, testcase := range tests {
t.Run(testcase.name, func(t *testing.T) {
diff, err := ParsePatch(db.DefaultContext, setting.Git.MaxGitDiffLines, setting.Git.MaxGitDiffLineCharacters, setting.Git.MaxGitDiffFiles, strings.NewReader(testcase.gitdiff), "")
assert.NoError(t, err)
for i, expected := range testcase.infos {
actual := diff.Files[i]
assert.NotNil(t, actual)
assert.Equal(t, expected, *actual.SubmoduleDiffInfo)
}
})
}
}
func TestSubmoduleInfo(t *testing.T) {
sdi := &SubmoduleDiffInfo{
SubmoduleName: "name",
PreviousRefID: "aaaa",
NewRefID: "bbbb",
}
ctx := t.Context()
assert.EqualValues(t, "1111", sdi.CommitRefIDLinkHTML(ctx, "1111"))
assert.EqualValues(t, "aaaa...bbbb", sdi.CompareRefIDLinkHTML(ctx))
assert.EqualValues(t, "name", sdi.SubmoduleRepoLinkHTML(ctx))
sdi.SubmoduleFile = git.NewCommitSubmoduleFile("https://github.com/owner/repo", "1234")
assert.EqualValues(t, `<a href="https://github.com/owner/repo/tree/1111">1111</a>`, sdi.CommitRefIDLinkHTML(ctx, "1111"))
assert.EqualValues(t, `<a href="https://github.com/owner/repo/compare/aaaa...bbbb">aaaa...bbbb</a>`, sdi.CompareRefIDLinkHTML(ctx))
assert.EqualValues(t, `<a href="https://github.com/owner/repo">name</a>`, sdi.SubmoduleRepoLinkHTML(ctx))
}