Files
Gitea/modules/markup/html_node.go
Gregorius Bima Kharisma Wicaksana f9d3983de2 fix: generate IDs for HTML headings without id attribute (#36233)
This PR fixes #27383 where HTML headings like `<h1>Title</h1>` in
markdown files would have empty permalink anchors

---------

Co-authored-by: wxiaoguang <wxiaoguang@gmail.com>
2026-01-06 05:09:44 +00:00

167 lines
5.2 KiB
Go

// Copyright 2024 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package markup
import (
"strings"
"code.gitea.io/gitea/modules/markup/common"
"golang.org/x/net/html"
)
func isAnchorIDUserContent(s string) bool {
// blackfridayExtRegex is for blackfriday extensions create IDs like fn:user-content-footnote
// old logic: blackfridayExtRegex = regexp.MustCompile(`[^:]*:user-content-`)
return strings.HasPrefix(s, "user-content-") || strings.Contains(s, ":user-content-")
}
func isAnchorIDFootnote(s string) bool {
return strings.HasPrefix(s, "fnref:user-content-") || strings.HasPrefix(s, "fn:user-content-")
}
func isAnchorHrefFootnote(s string) bool {
return strings.HasPrefix(s, "#fnref:user-content-") || strings.HasPrefix(s, "#fn:user-content-")
}
// isHeadingTag returns true if the node is a heading tag (h1-h6)
func isHeadingTag(node *html.Node) bool {
return node.Type == html.ElementNode &&
len(node.Data) == 2 &&
node.Data[0] == 'h' &&
node.Data[1] >= '1' && node.Data[1] <= '6'
}
// getNodeText extracts the text content from a node and its children
func getNodeText(node *html.Node) string {
var text strings.Builder
var extractText func(*html.Node)
extractText = func(n *html.Node) {
if n.Type == html.TextNode {
text.WriteString(n.Data)
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
extractText(c)
}
}
extractText(node)
return text.String()
}
func processNodeAttrID(ctx *RenderContext, node *html.Node) {
// Add user-content- to IDs and "#" links if they don't already have them,
// and convert the link href to a relative link to the host root
hasID := false
for idx, attr := range node.Attr {
if attr.Key == "id" {
hasID = true
if !isAnchorIDUserContent(attr.Val) {
node.Attr[idx].Val = "user-content-" + attr.Val
}
}
}
// For heading tags (h1-h6) without an id attribute, generate one from the text content.
// This ensures HTML headings like <h1>Title</h1> get proper permalink anchors
// matching the behavior of Markdown headings.
// Only enabled for repository files and wiki pages via EnableHeadingIDGeneration option.
if !hasID && isHeadingTag(node) && ctx.RenderOptions.EnableHeadingIDGeneration {
text := getNodeText(node)
if text != "" {
// Use the same CleanValue function used by Markdown heading ID generation
cleanedID := string(common.CleanValue([]byte(text)))
if cleanedID != "" {
node.Attr = append(node.Attr, html.Attribute{Key: "id", Val: "user-content-" + cleanedID})
}
}
}
}
func processFootnoteNode(ctx *RenderContext, node *html.Node) {
for idx, attr := range node.Attr {
if (attr.Key == "id" && isAnchorIDFootnote(attr.Val)) ||
(attr.Key == "href" && isAnchorHrefFootnote(attr.Val)) {
if footnoteContextID := ctx.RenderOptions.Metas["footnoteContextId"]; footnoteContextID != "" {
node.Attr[idx].Val = attr.Val + "-" + footnoteContextID
}
continue
}
}
}
func processNodeA(ctx *RenderContext, node *html.Node) {
for idx, attr := range node.Attr {
if attr.Key == "href" {
if anchorID, ok := strings.CutPrefix(attr.Val, "#"); ok {
if !isAnchorIDUserContent(attr.Val) {
node.Attr[idx].Val = "#user-content-" + anchorID
}
} else {
node.Attr[idx].Val = ctx.RenderHelper.ResolveLink(attr.Val, LinkTypeDefault)
}
}
}
}
func visitNodeImg(ctx *RenderContext, img *html.Node) (next *html.Node) {
next = img.NextSibling
attrSrc, hasLazy := "", false
for i, imgAttr := range img.Attr {
hasLazy = hasLazy || imgAttr.Key == "loading" && imgAttr.Val == "lazy"
if imgAttr.Key != "src" {
attrSrc = imgAttr.Val
continue
}
imgSrcOrigin := imgAttr.Val
isLinkable := imgSrcOrigin != "" && !strings.HasPrefix(imgSrcOrigin, "data:")
// By default, the "<img>" tag should also be clickable,
// because frontend uses `<img>` to paste the re-scaled image into the Markdown,
// so it must match the default Markdown image behavior.
cnt := 0
for p := img.Parent; isLinkable && p != nil && cnt < 2; p = p.Parent {
if hasParentAnchor := p.Type == html.ElementNode && p.Data == "a"; hasParentAnchor {
isLinkable = false
break
}
cnt++
}
if isLinkable {
wrapper := &html.Node{Type: html.ElementNode, Data: "a", Attr: []html.Attribute{
{Key: "href", Val: ctx.RenderHelper.ResolveLink(imgSrcOrigin, LinkTypeDefault)},
{Key: "target", Val: "_blank"},
}}
parent := img.Parent
imgNext := img.NextSibling
parent.RemoveChild(img)
parent.InsertBefore(wrapper, imgNext)
wrapper.AppendChild(img)
}
imgAttr.Val = ctx.RenderHelper.ResolveLink(imgSrcOrigin, LinkTypeMedia)
imgAttr.Val = camoHandleLink(imgAttr.Val)
img.Attr[i] = imgAttr
}
if !RenderBehaviorForTesting.DisableAdditionalAttributes && !hasLazy && !strings.HasPrefix(attrSrc, "data:") {
img.Attr = append(img.Attr, html.Attribute{Key: "loading", Val: "lazy"})
}
return next
}
func visitNodeVideo(ctx *RenderContext, node *html.Node) (next *html.Node) {
next = node.NextSibling
for i, attr := range node.Attr {
if attr.Key != "src" {
continue
}
if IsNonEmptyRelativePath(attr.Val) {
attr.Val = ctx.RenderHelper.ResolveLink(attr.Val, LinkTypeMedia)
}
attr.Val = camoHandleLink(attr.Val)
node.Attr[i] = attr
}
return next
}