first-commit
This commit is contained in:
225
modules/highlight/highlight.go
Normal file
225
modules/highlight/highlight.go
Normal file
@@ -0,0 +1,225 @@
|
||||
// Copyright 2015 The Gogs Authors. All rights reserved.
|
||||
// Copyright 2020 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package highlight
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"fmt"
|
||||
gohtml "html"
|
||||
"html/template"
|
||||
"io"
|
||||
"path"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"code.gitea.io/gitea/modules/analyze"
|
||||
"code.gitea.io/gitea/modules/log"
|
||||
"code.gitea.io/gitea/modules/setting"
|
||||
"code.gitea.io/gitea/modules/util"
|
||||
|
||||
"github.com/alecthomas/chroma/v2"
|
||||
"github.com/alecthomas/chroma/v2/formatters/html"
|
||||
"github.com/alecthomas/chroma/v2/lexers"
|
||||
"github.com/alecthomas/chroma/v2/styles"
|
||||
lru "github.com/hashicorp/golang-lru/v2"
|
||||
)
|
||||
|
||||
// don't index files larger than this many bytes for performance purposes
|
||||
const sizeLimit = 1024 * 1024
|
||||
|
||||
var (
|
||||
// For custom user mapping
|
||||
highlightMapping = map[string]string{}
|
||||
|
||||
once sync.Once
|
||||
|
||||
cache *lru.TwoQueueCache[string, any]
|
||||
|
||||
githubStyles = styles.Get("github")
|
||||
)
|
||||
|
||||
// NewContext loads custom highlight map from local config
|
||||
func NewContext() {
|
||||
once.Do(func() {
|
||||
highlightMapping = setting.GetHighlightMapping()
|
||||
|
||||
// The size 512 is simply a conservative rule of thumb
|
||||
c, err := lru.New2Q[string, any](512)
|
||||
if err != nil {
|
||||
panic(fmt.Sprintf("failed to initialize LRU cache for highlighter: %s", err))
|
||||
}
|
||||
cache = c
|
||||
})
|
||||
}
|
||||
|
||||
// Code returns a HTML version of code string with chroma syntax highlighting classes and the matched lexer name
|
||||
func Code(fileName, language, code string) (output template.HTML, lexerName string) {
|
||||
NewContext()
|
||||
|
||||
// diff view newline will be passed as empty, change to literal '\n' so it can be copied
|
||||
// preserve literal newline in blame view
|
||||
if code == "" || code == "\n" {
|
||||
return "\n", ""
|
||||
}
|
||||
|
||||
if len(code) > sizeLimit {
|
||||
return template.HTML(template.HTMLEscapeString(code)), ""
|
||||
}
|
||||
|
||||
var lexer chroma.Lexer
|
||||
|
||||
if len(language) > 0 {
|
||||
lexer = lexers.Get(language)
|
||||
|
||||
if lexer == nil {
|
||||
// Attempt stripping off the '?'
|
||||
if idx := strings.IndexByte(language, '?'); idx > 0 {
|
||||
lexer = lexers.Get(language[:idx])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if lexer == nil {
|
||||
if val, ok := highlightMapping[path.Ext(fileName)]; ok {
|
||||
// use mapped value to find lexer
|
||||
lexer = lexers.Get(val)
|
||||
}
|
||||
}
|
||||
|
||||
if lexer == nil {
|
||||
if l, ok := cache.Get(fileName); ok {
|
||||
lexer = l.(chroma.Lexer)
|
||||
}
|
||||
}
|
||||
|
||||
if lexer == nil {
|
||||
lexer = lexers.Match(fileName)
|
||||
if lexer == nil {
|
||||
lexer = lexers.Fallback
|
||||
}
|
||||
cache.Add(fileName, lexer)
|
||||
}
|
||||
|
||||
return CodeFromLexer(lexer, code), formatLexerName(lexer.Config().Name)
|
||||
}
|
||||
|
||||
// CodeFromLexer returns a HTML version of code string with chroma syntax highlighting classes
|
||||
func CodeFromLexer(lexer chroma.Lexer, code string) template.HTML {
|
||||
formatter := html.New(html.WithClasses(true),
|
||||
html.WithLineNumbers(false),
|
||||
html.PreventSurroundingPre(true),
|
||||
)
|
||||
|
||||
htmlbuf := bytes.Buffer{}
|
||||
htmlw := bufio.NewWriter(&htmlbuf)
|
||||
|
||||
iterator, err := lexer.Tokenise(nil, code)
|
||||
if err != nil {
|
||||
log.Error("Can't tokenize code: %v", err)
|
||||
return template.HTML(template.HTMLEscapeString(code))
|
||||
}
|
||||
// style not used for live site but need to pass something
|
||||
err = formatter.Format(htmlw, githubStyles, iterator)
|
||||
if err != nil {
|
||||
log.Error("Can't format code: %v", err)
|
||||
return template.HTML(template.HTMLEscapeString(code))
|
||||
}
|
||||
|
||||
_ = htmlw.Flush()
|
||||
// Chroma will add newlines for certain lexers in order to highlight them properly
|
||||
// Once highlighted, strip them here, so they don't cause copy/paste trouble in HTML output
|
||||
return template.HTML(strings.TrimSuffix(htmlbuf.String(), "\n"))
|
||||
}
|
||||
|
||||
// File returns a slice of chroma syntax highlighted HTML lines of code and the matched lexer name
|
||||
func File(fileName, language string, code []byte) ([]template.HTML, string, error) {
|
||||
NewContext()
|
||||
|
||||
if len(code) > sizeLimit {
|
||||
return PlainText(code), "", nil
|
||||
}
|
||||
|
||||
formatter := html.New(html.WithClasses(true),
|
||||
html.WithLineNumbers(false),
|
||||
html.PreventSurroundingPre(true),
|
||||
)
|
||||
|
||||
var lexer chroma.Lexer
|
||||
|
||||
// provided language overrides everything
|
||||
if language != "" {
|
||||
lexer = lexers.Get(language)
|
||||
}
|
||||
|
||||
if lexer == nil {
|
||||
if val, ok := highlightMapping[filepath.Ext(fileName)]; ok {
|
||||
lexer = lexers.Get(val)
|
||||
}
|
||||
}
|
||||
|
||||
if lexer == nil {
|
||||
guessLanguage := analyze.GetCodeLanguage(fileName, code)
|
||||
|
||||
lexer = lexers.Get(guessLanguage)
|
||||
if lexer == nil {
|
||||
lexer = lexers.Match(fileName)
|
||||
if lexer == nil {
|
||||
lexer = lexers.Fallback
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
lexerName := formatLexerName(lexer.Config().Name)
|
||||
|
||||
iterator, err := lexer.Tokenise(nil, string(code))
|
||||
if err != nil {
|
||||
return nil, "", fmt.Errorf("can't tokenize code: %w", err)
|
||||
}
|
||||
|
||||
tokensLines := chroma.SplitTokensIntoLines(iterator.Tokens())
|
||||
htmlBuf := &bytes.Buffer{}
|
||||
|
||||
lines := make([]template.HTML, 0, len(tokensLines))
|
||||
for _, tokens := range tokensLines {
|
||||
iterator = chroma.Literator(tokens...)
|
||||
err = formatter.Format(htmlBuf, githubStyles, iterator)
|
||||
if err != nil {
|
||||
return nil, "", fmt.Errorf("can't format code: %w", err)
|
||||
}
|
||||
lines = append(lines, template.HTML(htmlBuf.String()))
|
||||
htmlBuf.Reset()
|
||||
}
|
||||
|
||||
return lines, lexerName, nil
|
||||
}
|
||||
|
||||
// PlainText returns non-highlighted HTML for code
|
||||
func PlainText(code []byte) []template.HTML {
|
||||
r := bufio.NewReader(bytes.NewReader(code))
|
||||
m := make([]template.HTML, 0, bytes.Count(code, []byte{'\n'})+1)
|
||||
for {
|
||||
content, err := r.ReadString('\n')
|
||||
if err != nil && err != io.EOF {
|
||||
log.Error("failed to read string from buffer: %v", err)
|
||||
break
|
||||
}
|
||||
if content == "" && err == io.EOF {
|
||||
break
|
||||
}
|
||||
s := template.HTML(gohtml.EscapeString(content))
|
||||
m = append(m, s)
|
||||
}
|
||||
return m
|
||||
}
|
||||
|
||||
func formatLexerName(name string) string {
|
||||
if name == "fallback" {
|
||||
return "Plaintext"
|
||||
}
|
||||
|
||||
return util.ToTitleCaseNoLower(name)
|
||||
}
|
183
modules/highlight/highlight_test.go
Normal file
183
modules/highlight/highlight_test.go
Normal file
@@ -0,0 +1,183 @@
|
||||
// Copyright 2021 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package highlight
|
||||
|
||||
import (
|
||||
"html/template"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func lines(s string) (out []template.HTML) {
|
||||
// "" => [], "a" => ["a"], "a\n" => ["a\n"], "a\nb" => ["a\n", "b"] (each line always includes EOL "\n" if it exists)
|
||||
out = make([]template.HTML, 0)
|
||||
s = strings.ReplaceAll(strings.ReplaceAll(strings.TrimSpace(s), "\n", ""), `\n`, "\n")
|
||||
for {
|
||||
if p := strings.IndexByte(s, '\n'); p != -1 {
|
||||
out = append(out, template.HTML(s[:p+1]))
|
||||
s = s[p+1:]
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
if s != "" {
|
||||
out = append(out, template.HTML(s))
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func TestFile(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
code string
|
||||
want []template.HTML
|
||||
lexerName string
|
||||
}{
|
||||
{
|
||||
name: "empty.py",
|
||||
code: "",
|
||||
want: lines(""),
|
||||
lexerName: "Python",
|
||||
},
|
||||
{
|
||||
name: "empty.js",
|
||||
code: "",
|
||||
want: lines(""),
|
||||
lexerName: "JavaScript",
|
||||
},
|
||||
{
|
||||
name: "empty.yaml",
|
||||
code: "",
|
||||
want: lines(""),
|
||||
lexerName: "YAML",
|
||||
},
|
||||
{
|
||||
name: "tags.txt",
|
||||
code: "<>",
|
||||
want: lines("<>"),
|
||||
lexerName: "Plaintext",
|
||||
},
|
||||
{
|
||||
name: "tags.py",
|
||||
code: "<>",
|
||||
want: lines(`<span class="o"><</span><span class="o">></span>`),
|
||||
lexerName: "Python",
|
||||
},
|
||||
{
|
||||
name: "eol-no.py",
|
||||
code: "a=1",
|
||||
want: lines(`<span class="n">a</span><span class="o">=</span><span class="mi">1</span>`),
|
||||
lexerName: "Python",
|
||||
},
|
||||
{
|
||||
name: "eol-newline1.py",
|
||||
code: "a=1\n",
|
||||
want: lines(`<span class="n">a</span><span class="o">=</span><span class="mi">1</span>\n`),
|
||||
lexerName: "Python",
|
||||
},
|
||||
{
|
||||
name: "eol-newline2.py",
|
||||
code: "a=1\n\n",
|
||||
want: lines(`
|
||||
<span class="n">a</span><span class="o">=</span><span class="mi">1</span>\n
|
||||
\n
|
||||
`,
|
||||
),
|
||||
lexerName: "Python",
|
||||
},
|
||||
{
|
||||
name: "empty-line-with-space.py",
|
||||
code: strings.ReplaceAll(strings.TrimSpace(`
|
||||
def:
|
||||
a=1
|
||||
|
||||
b=''
|
||||
{space}
|
||||
c=2
|
||||
`), "{space}", " "),
|
||||
want: lines(`
|
||||
<span class="n">def</span><span class="p">:</span>\n
|
||||
<span class="n">a</span><span class="o">=</span><span class="mi">1</span>\n
|
||||
\n
|
||||
<span class="n">b</span><span class="o">=</span><span class="sa"></span><span class="s1">'</span><span class="s1">'</span>\n
|
||||
\n
|
||||
<span class="n">c</span><span class="o">=</span><span class="mi">2</span>`,
|
||||
),
|
||||
lexerName: "Python",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
out, lexerName, err := File(tt.name, "", []byte(tt.code))
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, tt.want, out)
|
||||
assert.Equal(t, tt.lexerName, lexerName)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestPlainText(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
code string
|
||||
want []template.HTML
|
||||
}{
|
||||
{
|
||||
name: "empty.py",
|
||||
code: "",
|
||||
want: lines(""),
|
||||
},
|
||||
{
|
||||
name: "tags.py",
|
||||
code: "<>",
|
||||
want: lines("<>"),
|
||||
},
|
||||
{
|
||||
name: "eol-no.py",
|
||||
code: "a=1",
|
||||
want: lines(`a=1`),
|
||||
},
|
||||
{
|
||||
name: "eol-newline1.py",
|
||||
code: "a=1\n",
|
||||
want: lines(`a=1\n`),
|
||||
},
|
||||
{
|
||||
name: "eol-newline2.py",
|
||||
code: "a=1\n\n",
|
||||
want: lines(`
|
||||
a=1\n
|
||||
\n
|
||||
`),
|
||||
},
|
||||
{
|
||||
name: "empty-line-with-space.py",
|
||||
code: strings.ReplaceAll(strings.TrimSpace(`
|
||||
def:
|
||||
a=1
|
||||
|
||||
b=''
|
||||
{space}
|
||||
c=2
|
||||
`), "{space}", " "),
|
||||
want: lines(`
|
||||
def:\n
|
||||
a=1\n
|
||||
\n
|
||||
b=''\n
|
||||
\n
|
||||
c=2`),
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
out := PlainText([]byte(tt.code))
|
||||
assert.Equal(t, tt.want, out)
|
||||
})
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user