first-commit

This commit is contained in:
2025-08-25 15:46:12 +08:00
commit f4d95dfff4
5665 changed files with 705359 additions and 0 deletions

186
modules/emoji/emoji.go Normal file
View File

@@ -0,0 +1,186 @@
// Copyright 2020 The Gitea Authors. All rights reserved.
// Copyright 2015 Kenneth Shaw
// SPDX-License-Identifier: MIT
package emoji
import (
"io"
"sort"
"strings"
"sync"
)
// Gemoji is a set of emoji data.
type Gemoji []Emoji
// Emoji represents a single emoji and associated data.
type Emoji struct {
Emoji string
Description string
Aliases []string
UnicodeVersion string
SkinTones bool
}
var (
// codeMap provides a map of the emoji unicode code to its emoji data.
codeMap map[string]int
// aliasMap provides a map of the alias to its emoji data.
aliasMap map[string]int
// emptyReplacer is the string replacer for emoji codes.
emptyReplacer *strings.Replacer
// codeReplacer is the string replacer for emoji codes.
codeReplacer *strings.Replacer
// aliasReplacer is the string replacer for emoji aliases.
aliasReplacer *strings.Replacer
once sync.Once
)
func loadMap() {
once.Do(func() {
// initialize
codeMap = make(map[string]int, len(GemojiData))
aliasMap = make(map[string]int, len(GemojiData))
// process emoji codes and aliases
codePairs := make([]string, 0)
emptyPairs := make([]string, 0)
aliasPairs := make([]string, 0)
// sort from largest to small so we match combined emoji first
sort.Slice(GemojiData, func(i, j int) bool {
return len(GemojiData[i].Emoji) > len(GemojiData[j].Emoji)
})
for i, e := range GemojiData {
if e.Emoji == "" || len(e.Aliases) == 0 {
continue
}
// setup codes
codeMap[e.Emoji] = i
codePairs = append(codePairs, e.Emoji, ":"+e.Aliases[0]+":")
emptyPairs = append(emptyPairs, e.Emoji, e.Emoji)
// setup aliases
for _, a := range e.Aliases {
if a == "" {
continue
}
aliasMap[a] = i
aliasPairs = append(aliasPairs, ":"+a+":", e.Emoji)
}
}
// create replacers
emptyReplacer = strings.NewReplacer(emptyPairs...)
codeReplacer = strings.NewReplacer(codePairs...)
aliasReplacer = strings.NewReplacer(aliasPairs...)
})
}
// FromCode retrieves the emoji data based on the provided unicode code (ie,
// "\u2618" will return the Gemoji data for "shamrock").
func FromCode(code string) *Emoji {
loadMap()
i, ok := codeMap[code]
if !ok {
return nil
}
return &GemojiData[i]
}
// FromAlias retrieves the emoji data based on the provided alias in the form
// "alias" or ":alias:" (ie, "shamrock" or ":shamrock:" will return the Gemoji
// data for "shamrock").
func FromAlias(alias string) *Emoji {
loadMap()
if strings.HasPrefix(alias, ":") && strings.HasSuffix(alias, ":") {
alias = alias[1 : len(alias)-1]
}
i, ok := aliasMap[alias]
if !ok {
return nil
}
return &GemojiData[i]
}
// ReplaceCodes replaces all emoji codes with the first corresponding emoji
// alias (in the form of ":alias:") (ie, "\u2618" will be converted to
// ":shamrock:").
func ReplaceCodes(s string) string {
loadMap()
return codeReplacer.Replace(s)
}
// ReplaceAliases replaces all aliases of the form ":alias:" with its
// corresponding unicode value.
func ReplaceAliases(s string) string {
loadMap()
return aliasReplacer.Replace(s)
}
type rememberSecondWriteWriter struct {
pos int
idx int
end int
writecount int
}
func (n *rememberSecondWriteWriter) Write(p []byte) (int, error) {
n.writecount++
if n.writecount == 2 {
n.idx = n.pos
n.end = n.pos + len(p)
n.pos += len(p)
return len(p), io.EOF
}
n.pos += len(p)
return len(p), nil
}
func (n *rememberSecondWriteWriter) WriteString(s string) (int, error) {
n.writecount++
if n.writecount == 2 {
n.idx = n.pos
n.end = n.pos + len(s)
n.pos += len(s)
return len(s), io.EOF
}
n.pos += len(s)
return len(s), nil
}
// FindEmojiSubmatchIndex returns index pair of longest emoji in a string
func FindEmojiSubmatchIndex(s string) []int {
loadMap()
secondWriteWriter := rememberSecondWriteWriter{}
// A faster and clean implementation would copy the trie tree formation in strings.NewReplacer but
// we can be lazy here.
//
// The implementation of strings.Replacer.WriteString is such that the first index of the emoji
// submatch is simply the second thing that is written to WriteString in the writer.
//
// Therefore we can simply take the index of the second write as our first emoji
//
// FIXME: just copy the trie implementation from strings.NewReplacer
_, _ = emptyReplacer.WriteString(&secondWriteWriter, s)
// if we wrote less than twice then we never "replaced"
if secondWriteWriter.writecount < 2 {
return nil
}
return []int{secondWriteWriter.idx, secondWriteWriter.end}
}

3404
modules/emoji/emoji_data.go Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,82 @@
// Copyright 2020 The Gitea Authors. All rights reserved.
// Copyright 2015 Kenneth Shaw
// SPDX-License-Identifier: MIT
package emoji
import (
"testing"
"github.com/stretchr/testify/assert"
)
func TestDumpInfo(t *testing.T) {
t.Logf("codes: %d", len(codeMap))
t.Logf("aliases: %d", len(aliasMap))
}
func TestLookup(t *testing.T) {
a := FromCode("\U0001f37a")
b := FromCode("🍺")
c := FromAlias(":beer:")
d := FromAlias("beer")
assert.Equal(t, a, b)
assert.Equal(t, b, c)
assert.Equal(t, c, d)
assert.Equal(t, a, d)
m := FromCode("\U0001f44d")
n := FromAlias(":thumbsup:")
o := FromAlias("+1")
assert.Equal(t, m, n)
assert.Equal(t, m, o)
assert.Equal(t, n, o)
}
func TestReplacers(t *testing.T) {
tests := []struct {
f func(string) string
v, exp string
}{
{ReplaceCodes, ":thumbsup: +1 for \U0001f37a! 🍺 \U0001f44d", ":thumbsup: +1 for :beer:! :beer: :+1:"},
{ReplaceAliases, ":thumbsup: +1 :+1: :beer:", "\U0001f44d +1 \U0001f44d \U0001f37a"},
}
for i, x := range tests {
s := x.f(x.v)
assert.Equalf(t, x.exp, s, "test %d `%s` expected `%s`, got: `%s`", i, x.v, x.exp, s)
}
}
func TestFindEmojiSubmatchIndex(t *testing.T) {
type testcase struct {
teststring string
expected []int
}
testcases := []testcase{
{
"\U0001f44d",
[]int{0, len("\U0001f44d")},
},
{
"\U0001f44d +1 \U0001f44d \U0001f37a",
[]int{0, 4},
},
{
" \U0001f44d",
[]int{1, 1 + len("\U0001f44d")},
},
{
string([]byte{'\u0001'}) + "\U0001f44d",
[]int{1, 1 + len("\U0001f44d")},
},
}
for _, kase := range testcases {
actual := FindEmojiSubmatchIndex(kase.teststring)
assert.Equal(t, kase.expected, actual)
}
}