appview: parse reference links from markdown body #760

merged
opened by boltless.me targeting master from feat/mentions

Defined refResolver which will parse useful data from markdown body like @-mentions or issue/pr/comment mentions

Signed-off-by: Seongmin Lee git@boltless.me

Changed files
+470 -33
appview
+172
appview/db/reference.go
···
+
package db
+
+
import (
+
"database/sql"
+
"fmt"
+
"strings"
+
+
"github.com/bluesky-social/indigo/atproto/syntax"
+
"tangled.org/core/api/tangled"
+
"tangled.org/core/appview/models"
+
)
+
+
// FindReferences resolves refLinks to Issue/PR/IssueComment/PullComment ATURIs.
+
// It will ignore missing refLinks.
+
func FindReferences(e Execer, refLinks []models.ReferenceLink) ([]syntax.ATURI, error) {
+
var (
+
issueRefs []models.ReferenceLink
+
pullRefs []models.ReferenceLink
+
)
+
for _, ref := range refLinks {
+
switch ref.Kind {
+
case models.RefKindIssue:
+
issueRefs = append(issueRefs, ref)
+
case models.RefKindPull:
+
pullRefs = append(pullRefs, ref)
+
}
+
}
+
issueUris, err := findIssueReferences(e, issueRefs)
+
if err != nil {
+
return nil, err
+
}
+
pullUris, err := findPullReferences(e, pullRefs)
+
if err != nil {
+
return nil, err
+
}
+
+
return append(issueUris, pullUris...), nil
+
}
+
+
func findIssueReferences(e Execer, refLinks []models.ReferenceLink) ([]syntax.ATURI, error) {
+
if len(refLinks) == 0 {
+
return nil, nil
+
}
+
vals := make([]string, len(refLinks))
+
args := make([]any, 0, len(refLinks)*4)
+
for i, ref := range refLinks {
+
vals[i] = "(?, ?, ?, ?)"
+
args = append(args, ref.Handle, ref.Repo, ref.SubjectId, ref.CommentId)
+
}
+
query := fmt.Sprintf(
+
`with input(owner_did, name, issue_id, comment_id) as (
+
values %s
+
)
+
select
+
i.did, i.rkey,
+
c.did, c.rkey
+
from input inp
+
join repos r
+
on r.did = inp.owner_did
+
and r.name = inp.name
+
join issues i
+
on i.repo_at = r.at_uri
+
and i.issue_id = inp.issue_id
+
left join issue_comments c
+
on inp.comment_id is not null
+
and c.issue_at = i.at_uri
+
and c.id = inp.comment_id
+
`,
+
strings.Join(vals, ","),
+
)
+
rows, err := e.Query(query, args...)
+
if err != nil {
+
return nil, err
+
}
+
defer rows.Close()
+
+
var uris []syntax.ATURI
+
+
for rows.Next() {
+
// Scan rows
+
var issueOwner, issueRkey string
+
var commentOwner, commentRkey sql.NullString
+
var uri syntax.ATURI
+
if err := rows.Scan(&issueOwner, &issueRkey, &commentOwner, &commentRkey); err != nil {
+
return nil, err
+
}
+
if commentOwner.Valid && commentRkey.Valid {
+
uri = syntax.ATURI(fmt.Sprintf(
+
"at://%s/%s/%s",
+
commentOwner.String,
+
tangled.RepoIssueCommentNSID,
+
commentRkey.String,
+
))
+
} else {
+
uri = syntax.ATURI(fmt.Sprintf(
+
"at://%s/%s/%s",
+
issueOwner,
+
tangled.RepoIssueNSID,
+
issueRkey,
+
))
+
}
+
uris = append(uris, uri)
+
}
+
return uris, nil
+
}
+
+
func findPullReferences(e Execer, refLinks []models.ReferenceLink) ([]syntax.ATURI, error) {
+
if len(refLinks) == 0 {
+
return nil, nil
+
}
+
vals := make([]string, len(refLinks))
+
args := make([]any, 0, len(refLinks)*4)
+
for i, ref := range refLinks {
+
vals[i] = "(?, ?, ?, ?)"
+
args = append(args, ref.Handle, ref.Repo, ref.SubjectId, ref.CommentId)
+
}
+
query := fmt.Sprintf(
+
`with input(owner_did, name, pull_id, comment_id) as (
+
values %s
+
)
+
select
+
p.owner_did, p.rkey,
+
c.owner_did, c.rkey
+
from input inp
+
join repos r
+
on r.did = inp.owner_did
+
and r.name = inp.name
+
join pulls p
+
on p.repo_at = r.at_uri
+
and p.pull_id = inp.pull_id
+
left join pull_comments c
+
on inp.comment_id is not null
+
and c.repo_at = r.at_uri and c.pull_id = p.pull_id
+
and c.id = inp.comment_id
+
`,
+
strings.Join(vals, ","),
+
)
+
rows, err := e.Query(query, args...)
+
if err != nil {
+
return nil, err
+
}
+
defer rows.Close()
+
+
var uris []syntax.ATURI
+
+
for rows.Next() {
+
// Scan rows
+
var pullOwner, pullRkey string
+
var commentOwner, commentRkey sql.NullString
+
var uri syntax.ATURI
+
if err := rows.Scan(&pullOwner, &pullRkey, &commentOwner, &commentRkey); err != nil {
+
return nil, err
+
}
+
if commentOwner.Valid && commentRkey.Valid {
+
uri = syntax.ATURI(fmt.Sprintf(
+
"at://%s/%s/%s",
+
commentOwner.String,
+
tangled.RepoPullCommentNSID,
+
commentRkey.String,
+
))
+
} else {
+
uri = syntax.ATURI(fmt.Sprintf(
+
"at://%s/%s/%s",
+
pullOwner,
+
tangled.RepoPullNSID,
+
pullRkey,
+
))
+
}
+
uris = append(uris, uri)
+
}
+
return uris, nil
+
}
+10 -20
appview/issues/issues.go
···
"tangled.org/core/appview/notify"
"tangled.org/core/appview/oauth"
"tangled.org/core/appview/pages"
-
"tangled.org/core/appview/pages/markup"
"tangled.org/core/appview/pagination"
+
"tangled.org/core/appview/refresolver"
"tangled.org/core/appview/reporesolver"
"tangled.org/core/appview/validator"
"tangled.org/core/idresolver"
···
repoResolver *reporesolver.RepoResolver
pages *pages.Pages
idResolver *idresolver.Resolver
+
refResolver *refresolver.Resolver
db *db.DB
config *config.Config
notifier notify.Notifier
···
repoResolver *reporesolver.RepoResolver,
pages *pages.Pages,
idResolver *idresolver.Resolver,
+
refResolver *refresolver.Resolver,
db *db.DB,
config *config.Config,
notifier notify.Notifier,
···
repoResolver: repoResolver,
pages: pages,
idResolver: idResolver,
+
refResolver: refResolver,
db: db,
config: config,
notifier: notifier,
···
replyTo = &replyToUri
}
+
mentions, _ := rp.refResolver.Resolve(r.Context(), body)
+
comment := models.IssueComment{
Did: user.Did,
Rkey: tid.TID(),
···
// notify about the new comment
comment.Id = commentId
-
rawMentions := markup.FindUserMentions(comment.Body)
-
idents := rp.idResolver.ResolveIdents(r.Context(), rawMentions)
-
l.Debug("parsed mentions", "raw", rawMentions, "idents", idents)
-
var mentions []syntax.DID
-
for _, ident := range idents {
-
if ident != nil && !ident.Handle.IsInvalidHandle() {
-
mentions = append(mentions, ident.DID)
-
}
-
}
rp.notifier.NewIssueComment(r.Context(), &comment, mentions)
rp.pages.HxLocation(w, fmt.Sprintf("/%s/issues/%d#comment-%d", f.OwnerSlashRepo(), issue.IssueId, commentId))
···
RepoInfo: f.RepoInfo(user),
})
case http.MethodPost:
+
body := r.FormValue("body")
+
mentions, _ := rp.refResolver.Resolve(r.Context(), body)
+
issue := &models.Issue{
RepoAt: f.RepoAt(),
Rkey: tid.TID(),
Title: r.FormValue("title"),
-
Body: r.FormValue("body"),
+
Body: body,
Open: true,
Did: user.Did,
Created: time.Now(),
···
// everything is successful, do not rollback the atproto record
atUri = ""
-
rawMentions := markup.FindUserMentions(issue.Body)
-
idents := rp.idResolver.ResolveIdents(r.Context(), rawMentions)
-
l.Debug("parsed mentions", "raw", rawMentions, "idents", idents)
-
var mentions []syntax.DID
-
for _, ident := range idents {
-
if ident != nil && !ident.Handle.IsInvalidHandle() {
-
mentions = append(mentions, ident.DID)
-
}
-
}
rp.notifier.NewIssue(r.Context(), issue, mentions)
rp.pages.HxLocation(w, fmt.Sprintf("/%s/issues/%d", f.OwnerSlashRepo(), issue.IssueId))
return
+18
appview/models/reference.go
···
+
package models
+
+
type RefKind int
+
+
const (
+
RefKindIssue RefKind = iota
+
RefKindPull
+
)
+
+
// /@alice.com/cool-proj/issues/123
+
// /@alice.com/cool-proj/issues/123#comment-321
+
type ReferenceLink struct {
+
Handle string
+
Repo string
+
Kind RefKind
+
SubjectId int
+
CommentId *int
+
}
+3 -3
appview/pages/markup/extension/atlink.go
···
// An AtNode struct represents an AtNode
type AtNode struct {
-
handle string
+
Handle string
ast.BaseInline
}
···
block.Advance(m[1])
node := &AtNode{}
node.AppendChild(node, ast.NewTextSegment(atSegment))
-
node.handle = string(atSegment.Value(block.Source())[1:])
+
node.Handle = string(atSegment.Value(block.Source())[1:])
return node
}
···
func (r *atHtmlRenderer) renderAt(w util.BufWriter, source []byte, n ast.Node, entering bool) (ast.WalkStatus, error) {
if entering {
w.WriteString(`<a href="/@`)
-
w.WriteString(n.(*AtNode).handle)
+
w.WriteString(n.(*AtNode).Handle)
w.WriteString(`" class="mention">`)
} else {
w.WriteString("</a>")
+24
appview/pages/markup/markdown.go
···
return md
}
+
// FindUserMentions returns Set of user handles from given markup soruce.
+
// It doesn't guarntee unique DIDs
+
func FindUserMentions(source string) []string {
+
var (
+
mentions []string
+
mentionsSet = make(map[string]struct{})
+
md = NewMarkdown()
+
sourceBytes = []byte(source)
+
root = md.Parser().Parse(text.NewReader(sourceBytes))
+
)
+
ast.Walk(root, func(n ast.Node, entering bool) (ast.WalkStatus, error) {
+
if entering && n.Kind() == textension.KindAt {
+
handle := n.(*textension.AtNode).Handle
+
mentionsSet[handle] = struct{}{}
+
return ast.WalkSkipChildren, nil
+
}
+
return ast.WalkContinue, nil
+
})
+
for handle := range mentionsSet {
+
mentions = append(mentions, handle)
+
}
+
return mentions
+
}
+
func (rctx *RenderContext) RenderMarkdown(source string) string {
md := NewMarkdown()
+123
appview/pages/markup/reference_link.go
···
+
package markup
+
+
import (
+
"maps"
+
"net/url"
+
"path"
+
"slices"
+
"strconv"
+
"strings"
+
+
"github.com/yuin/goldmark/ast"
+
"github.com/yuin/goldmark/text"
+
"tangled.org/core/appview/models"
+
)
+
+
// FindReferences collects all links referencing tangled-related objects
+
// like issues, PRs, comments or even @-mentions
+
// This funciton doesn't actually check for the existence of records in the DB
+
// or the PDS; it merely returns a list of what are presumed to be references.
+
func FindReferences(baseUrl string, source string) ([]string, []models.ReferenceLink) {
+
var (
+
refLinkSet = make(map[models.ReferenceLink]struct{})
+
mentionsSet = make(map[string]struct{})
+
md = NewMarkdown()
+
sourceBytes = []byte(source)
+
root = md.Parser().Parse(text.NewReader(sourceBytes))
+
)
+
// trim url scheme. the SSL shouldn't matter
+
baseUrl = strings.TrimPrefix(baseUrl, "https://")
+
baseUrl = strings.TrimPrefix(baseUrl, "http://")
+
+
ast.Walk(root, func(n ast.Node, entering bool) (ast.WalkStatus, error) {
+
if !entering {
+
return ast.WalkContinue, nil
+
}
+
switch n.Kind() {
+
case KindAt:
+
handle := n.(*AtNode).handle
+
mentionsSet[handle] = struct{}{}
+
return ast.WalkSkipChildren, nil
+
case ast.KindLink:
+
dest := string(n.(*ast.Link).Destination)
+
ref := parseTangledLink(baseUrl, dest)
+
if ref != nil {
+
refLinkSet[*ref] = struct{}{}
+
}
+
return ast.WalkSkipChildren, nil
+
case ast.KindAutoLink:
+
an := n.(*ast.AutoLink)
+
if an.AutoLinkType == ast.AutoLinkURL {
+
dest := string(an.URL(sourceBytes))
+
ref := parseTangledLink(baseUrl, dest)
+
if ref != nil {
+
refLinkSet[*ref] = struct{}{}
+
}
+
}
+
return ast.WalkSkipChildren, nil
+
}
+
return ast.WalkContinue, nil
+
})
+
mentions := slices.Collect(maps.Keys(mentionsSet))
+
references := slices.Collect(maps.Keys(refLinkSet))
+
return mentions, references
+
}
+
+
func parseTangledLink(baseHost string, urlStr string) *models.ReferenceLink {
+
u, err := url.Parse(urlStr)
+
if err != nil {
+
return nil
+
}
+
+
if u.Host != "" && !strings.EqualFold(u.Host, baseHost) {
+
return nil
+
}
+
+
p := path.Clean(u.Path)
+
parts := strings.FieldsFunc(p, func(r rune) bool { return r == '/' })
+
if len(parts) < 4 {
+
// need at least: handle / repo / kind / id
+
return nil
+
}
+
+
var (
+
handle = parts[0]
+
repo = parts[1]
+
kindSeg = parts[2]
+
subjectSeg = parts[3]
+
)
+
+
handle = strings.TrimPrefix(handle, "@")
+
+
var kind models.RefKind
+
switch kindSeg {
+
case "issues":
+
kind = models.RefKindIssue
+
case "pulls":
+
kind = models.RefKindPull
+
default:
+
return nil
+
}
+
+
subjectId, err := strconv.Atoi(subjectSeg)
+
if err != nil {
+
return nil
+
}
+
var commentId *int
+
if u.Fragment != "" {
+
if strings.HasPrefix(u.Fragment, "comment-") {
+
commentIdStr := u.Fragment[len("comment-"):]
+
if id, err := strconv.Atoi(commentIdStr); err == nil {
+
commentId = &id
+
}
+
}
+
}
+
+
return &models.ReferenceLink{
+
Handle: handle,
+
Repo: repo,
+
Kind: kind,
+
SubjectId: subjectId,
+
CommentId: commentId,
+
}
+
}
+6 -10
appview/pulls/pulls.go
···
"tangled.org/core/appview/oauth"
"tangled.org/core/appview/pages"
"tangled.org/core/appview/pages/markup"
+
"tangled.org/core/appview/refresolver"
"tangled.org/core/appview/reporesolver"
"tangled.org/core/appview/validator"
"tangled.org/core/appview/xrpcclient"
···
repoResolver *reporesolver.RepoResolver
pages *pages.Pages
idResolver *idresolver.Resolver
+
refResolver *refresolver.Resolver
db *db.DB
config *config.Config
notifier notify.Notifier
···
repoResolver *reporesolver.RepoResolver,
pages *pages.Pages,
resolver *idresolver.Resolver,
+
refResolver *refresolver.Resolver,
db *db.DB,
config *config.Config,
notifier notify.Notifier,
···
repoResolver: repoResolver,
pages: pages,
idResolver: resolver,
+
refResolver: refResolver,
db: db,
config: config,
notifier: notifier,
···
}
func (s *Pulls) PullComment(w http.ResponseWriter, r *http.Request) {
-
l := s.logger.With("handler", "PullComment")
user := s.oauth.GetUser(r)
f, err := s.repoResolver.Resolve(r)
if err != nil {
···
return
}
+
mentions, _ := s.refResolver.Resolve(r.Context(), body)
+
// Start a transaction
tx, err := s.db.BeginTx(r.Context(), nil)
if err != nil {
···
return
}
-
rawMentions := markup.FindUserMentions(comment.Body)
-
idents := s.idResolver.ResolveIdents(r.Context(), rawMentions)
-
l.Debug("parsed mentions", "raw", rawMentions, "idents", idents)
-
var mentions []syntax.DID
-
for _, ident := range idents {
-
if ident != nil && !ident.Handle.IsInvalidHandle() {
-
mentions = append(mentions, ident.DID)
-
}
-
}
s.notifier.NewPullComment(r.Context(), comment, mentions)
s.pages.HxLocation(w, fmt.Sprintf("/%s/pulls/%d#comment-%d", f.OwnerSlashRepo(), pull.PullId, commentId))
+65
appview/refresolver/resolver.go
···
+
package refresolver
+
+
import (
+
"context"
+
"log/slog"
+
+
"github.com/bluesky-social/indigo/atproto/syntax"
+
"tangled.org/core/appview/config"
+
"tangled.org/core/appview/db"
+
"tangled.org/core/appview/models"
+
"tangled.org/core/appview/pages/markup"
+
"tangled.org/core/idresolver"
+
)
+
+
type Resolver struct {
+
config *config.Config
+
idResolver *idresolver.Resolver
+
execer db.Execer
+
logger *slog.Logger
+
}
+
+
func New(
+
config *config.Config,
+
idResolver *idresolver.Resolver,
+
execer db.Execer,
+
logger *slog.Logger,
+
) *Resolver {
+
return &Resolver{
+
config,
+
idResolver,
+
execer,
+
logger,
+
}
+
}
+
+
func (r *Resolver) Resolve(ctx context.Context, source string) ([]syntax.DID, []syntax.ATURI) {
+
l := r.logger.With("method", "find_references")
+
rawMentions, rawRefs := markup.FindReferences(r.config.Core.AppviewHost, source)
+
l.Debug("found possible references", "mentions", rawMentions, "refs", rawRefs)
+
idents := r.idResolver.ResolveIdents(ctx, rawMentions)
+
var mentions []syntax.DID
+
for _, ident := range idents {
+
if ident != nil && !ident.Handle.IsInvalidHandle() {
+
mentions = append(mentions, ident.DID)
+
}
+
}
+
l.Debug("found mentions", "mentions", mentions)
+
+
var resolvedRefs []models.ReferenceLink
+
for _, rawRef := range rawRefs {
+
ident, err := r.idResolver.ResolveIdent(ctx, rawRef.Handle)
+
if err != nil || ident == nil || ident.Handle.IsInvalidHandle() {
+
continue
+
}
+
rawRef.Handle = string(ident.DID)
+
resolvedRefs = append(resolvedRefs, rawRef)
+
}
+
aturiRefs, err := db.FindReferences(r.execer, resolvedRefs)
+
if err != nil {
+
l.Error("failed running query", "err", err)
+
}
+
l.Debug("found references", "refs", aturiRefs)
+
+
return mentions, aturiRefs
+
}
+2
appview/state/router.go
···
s.repoResolver,
s.pages,
s.idResolver,
+
s.refResolver,
s.db,
s.config,
s.notifier,
···
s.repoResolver,
s.pages,
s.idResolver,
+
s.refResolver,
s.db,
s.config,
s.notifier,
+5
appview/state/state.go
···
phnotify "tangled.org/core/appview/notify/posthog"
"tangled.org/core/appview/oauth"
"tangled.org/core/appview/pages"
+
"tangled.org/core/appview/refresolver"
"tangled.org/core/appview/reporesolver"
"tangled.org/core/appview/validator"
xrpcclient "tangled.org/core/appview/xrpcclient"
···
enforcer *rbac.Enforcer
pages *pages.Pages
idResolver *idresolver.Resolver
+
refResolver *refresolver.Resolver
posthog posthog.Client
jc *jetstream.JetstreamClient
config *config.Config
···
repoResolver := reporesolver.New(config, enforcer, res, d)
+
refResolver := refresolver.New(config, res, d, log.SubLogger(logger, "refResolver"))
+
wrapper := db.DbWrapper{Execer: d}
jc, err := jetstream.NewJetstreamClient(
config.Jetstream.Endpoint,
···
enforcer,
pages,
res,
+
refResolver,
posthog,
jc,
config,