From 45d9266c27a58584c6bf2a51cd9ab90ad7e6d686 Mon Sep 17 00:00:00 2001 From: Seongmin Lee Date: Wed, 5 Nov 2025 00:26:45 +0900 Subject: [PATCH] appview: parse reference links from markdown body Change-Id: ozvsznuvkvwwqovyooksxuovlzkwlzul Defined `refResolver` which will parse useful data from markdown body like @-mentions or issue/pr/comment mentions Signed-off-by: Seongmin Lee --- appview/db/reference.go | 172 ++++++++++++++++++++ appview/issues/issues.go | 30 ++-- appview/models/reference.go | 18 ++ appview/pages/markup/markdown.go | 24 --- appview/pages/markup/reference_link.go | 124 ++++++++++++++ appview/pages/markup/reference_link_test.go | 42 +++++ appview/pulls/pulls.go | 16 +- appview/refresolver/resolver.go | 65 ++++++++ appview/state/router.go | 2 + appview/state/state.go | 5 + 10 files changed, 444 insertions(+), 54 deletions(-) create mode 100644 appview/db/reference.go create mode 100644 appview/models/reference.go create mode 100644 appview/pages/markup/reference_link.go create mode 100644 appview/pages/markup/reference_link_test.go create mode 100644 appview/refresolver/resolver.go diff --git a/appview/db/reference.go b/appview/db/reference.go new file mode 100644 index 00000000..19a3a0f0 --- /dev/null +++ b/appview/db/reference.go @@ -0,0 +1,172 @@ +package db + +import ( + "database/sql" + "fmt" + "strings" + + "github.com/bluesky-social/indigo/atproto/syntax" + "tangled.org/core/api/tangled" + "tangled.org/core/appview/models" +) + +// FindReferences resolves refLinks to Issue/PR/IssueComment/PullComment ATURIs. +// It will ignore missing refLinks. +func FindReferences(e Execer, refLinks []models.ReferenceLink) ([]syntax.ATURI, error) { + var ( + issueRefs []models.ReferenceLink + pullRefs []models.ReferenceLink + ) + for _, ref := range refLinks { + switch ref.Kind { + case models.RefKindIssue: + issueRefs = append(issueRefs, ref) + case models.RefKindPull: + pullRefs = append(pullRefs, ref) + } + } + issueUris, err := findIssueReferences(e, issueRefs) + if err != nil { + return nil, err + } + pullUris, err := findPullReferences(e, pullRefs) + if err != nil { + return nil, err + } + + return append(issueUris, pullUris...), nil +} + +func findIssueReferences(e Execer, refLinks []models.ReferenceLink) ([]syntax.ATURI, error) { + if len(refLinks) == 0 { + return nil, nil + } + vals := make([]string, len(refLinks)) + args := make([]any, 0, len(refLinks)*4) + for i, ref := range refLinks { + vals[i] = "(?, ?, ?, ?)" + args = append(args, ref.Handle, ref.Repo, ref.SubjectId, ref.CommentId) + } + query := fmt.Sprintf( + `with input(owner_did, name, issue_id, comment_id) as ( + values %s + ) + select + i.did, i.rkey, + c.did, c.rkey + from input inp + join repos r + on r.did = inp.owner_did + and r.name = inp.name + join issues i + on i.repo_at = r.at_uri + and i.issue_id = inp.issue_id + left join issue_comments c + on inp.comment_id is not null + and c.issue_at = i.at_uri + and c.id = inp.comment_id + `, + strings.Join(vals, ","), + ) + rows, err := e.Query(query, args...) + if err != nil { + return nil, err + } + defer rows.Close() + + var uris []syntax.ATURI + + for rows.Next() { + // Scan rows + var issueOwner, issueRkey string + var commentOwner, commentRkey sql.NullString + var uri syntax.ATURI + if err := rows.Scan(&issueOwner, &issueRkey, &commentOwner, &commentRkey); err != nil { + return nil, err + } + if commentOwner.Valid && commentRkey.Valid { + uri = syntax.ATURI(fmt.Sprintf( + "at://%s/%s/%s", + commentOwner.String, + tangled.RepoIssueCommentNSID, + commentRkey.String, + )) + } else { + uri = syntax.ATURI(fmt.Sprintf( + "at://%s/%s/%s", + issueOwner, + tangled.RepoIssueNSID, + issueRkey, + )) + } + uris = append(uris, uri) + } + return uris, nil +} + +func findPullReferences(e Execer, refLinks []models.ReferenceLink) ([]syntax.ATURI, error) { + if len(refLinks) == 0 { + return nil, nil + } + vals := make([]string, len(refLinks)) + args := make([]any, 0, len(refLinks)*4) + for i, ref := range refLinks { + vals[i] = "(?, ?, ?, ?)" + args = append(args, ref.Handle, ref.Repo, ref.SubjectId, ref.CommentId) + } + query := fmt.Sprintf( + `with input(owner_did, name, pull_id, comment_id) as ( + values %s + ) + select + p.owner_did, p.rkey, + c.owner_did, c.rkey + from input inp + join repos r + on r.did = inp.owner_did + and r.name = inp.name + join pulls p + on p.repo_at = r.at_uri + and p.pull_id = inp.pull_id + left join pull_comments c + on inp.comment_id is not null + and c.repo_at = r.at_uri and c.pull_id = p.pull_id + and c.id = inp.comment_id + `, + strings.Join(vals, ","), + ) + rows, err := e.Query(query, args...) + if err != nil { + return nil, err + } + defer rows.Close() + + var uris []syntax.ATURI + + for rows.Next() { + // Scan rows + var pullOwner, pullRkey string + var commentOwner, commentRkey sql.NullString + var uri syntax.ATURI + if err := rows.Scan(&pullOwner, &pullRkey, &commentOwner, &commentRkey); err != nil { + return nil, err + } + if commentOwner.Valid && commentRkey.Valid { + uri = syntax.ATURI(fmt.Sprintf( + "at://%s/%s/%s", + commentOwner.String, + tangled.RepoPullCommentNSID, + commentRkey.String, + )) + } else { + uri = syntax.ATURI(fmt.Sprintf( + "at://%s/%s/%s", + pullOwner, + tangled.RepoPullNSID, + pullRkey, + )) + } + uris = append(uris, uri) + } + return uris, nil +} diff --git a/appview/issues/issues.go b/appview/issues/issues.go index 5bf5199a..a720bd4e 100644 --- a/appview/issues/issues.go +++ b/appview/issues/issues.go @@ -24,8 +24,8 @@ import ( "tangled.org/core/appview/notify" "tangled.org/core/appview/oauth" "tangled.org/core/appview/pages" - "tangled.org/core/appview/pages/markup" "tangled.org/core/appview/pagination" + "tangled.org/core/appview/refresolver" "tangled.org/core/appview/reporesolver" "tangled.org/core/appview/validator" "tangled.org/core/idresolver" @@ -37,6 +37,7 @@ type Issues struct { repoResolver *reporesolver.RepoResolver pages *pages.Pages idResolver *idresolver.Resolver + refResolver *refresolver.Resolver db *db.DB config *config.Config notifier notify.Notifier @@ -50,6 +51,7 @@ func New( repoResolver *reporesolver.RepoResolver, pages *pages.Pages, idResolver *idresolver.Resolver, + refResolver *refresolver.Resolver, db *db.DB, config *config.Config, notifier notify.Notifier, @@ -62,6 +64,7 @@ func New( repoResolver: repoResolver, pages: pages, idResolver: idResolver, + refResolver: refResolver, db: db, config: config, notifier: notifier, @@ -399,6 +402,8 @@ func (rp *Issues) NewIssueComment(w http.ResponseWriter, r *http.Request) { replyTo = &replyToUri } + mentions, _ := rp.refResolver.Resolve(r.Context(), body) + comment := models.IssueComment{ Did: user.Did, Rkey: tid.TID(), @@ -455,15 +460,6 @@ func (rp *Issues) NewIssueComment(w http.ResponseWriter, r *http.Request) { // notify about the new comment comment.Id = commentId - rawMentions := markup.FindUserMentions(comment.Body) - idents := rp.idResolver.ResolveIdents(r.Context(), rawMentions) - l.Debug("parsed mentions", "raw", rawMentions, "idents", idents) - var mentions []syntax.DID - for _, ident := range idents { - if ident != nil && !ident.Handle.IsInvalidHandle() { - mentions = append(mentions, ident.DID) - } - } rp.notifier.NewIssueComment(r.Context(), &comment, mentions) rp.pages.HxLocation(w, fmt.Sprintf("/%s/issues/%d#comment-%d", f.OwnerSlashRepo(), issue.IssueId, commentId)) @@ -884,11 +880,14 @@ func (rp *Issues) NewIssue(w http.ResponseWriter, r *http.Request) { RepoInfo: f.RepoInfo(user), }) case http.MethodPost: + body := r.FormValue("body") + mentions, _ := rp.refResolver.Resolve(r.Context(), body) + issue := &models.Issue{ RepoAt: f.RepoAt(), Rkey: tid.TID(), Title: r.FormValue("title"), - Body: r.FormValue("body"), + Body: body, Open: true, Did: user.Did, Created: time.Now(), @@ -960,15 +959,6 @@ func (rp *Issues) NewIssue(w http.ResponseWriter, r *http.Request) { // everything is successful, do not rollback the atproto record atUri = "" - rawMentions := markup.FindUserMentions(issue.Body) - idents := rp.idResolver.ResolveIdents(r.Context(), rawMentions) - l.Debug("parsed mentions", "raw", rawMentions, "idents", idents) - var mentions []syntax.DID - for _, ident := range idents { - if ident != nil && !ident.Handle.IsInvalidHandle() { - mentions = append(mentions, ident.DID) - } - } rp.notifier.NewIssue(r.Context(), issue, mentions) rp.pages.HxLocation(w, fmt.Sprintf("/%s/issues/%d", f.OwnerSlashRepo(), issue.IssueId)) return diff --git a/appview/models/reference.go b/appview/models/reference.go new file mode 100644 index 00000000..4c871825 --- /dev/null +++ b/appview/models/reference.go @@ -0,0 +1,18 @@ +package models + +type RefKind int + +const ( + RefKindIssue RefKind = iota + RefKindPull +) + +// /@alice.com/cool-proj/issues/123 +// /@alice.com/cool-proj/issues/123#comment-321 +type ReferenceLink struct { + Handle string + Repo string + Kind RefKind + SubjectId int + CommentId *int +} diff --git a/appview/pages/markup/markdown.go b/appview/pages/markup/markdown.go index 4f290e44..81bea562 100644 --- a/appview/pages/markup/markdown.go +++ b/appview/pages/markup/markdown.go @@ -302,30 +302,6 @@ func (rctx *RenderContext) actualPath(dst string) string { return path.Join(rctx.CurrentDir, dst) } -// FindUserMentions returns Set of user handles from given markup soruce. -// It doesn't guarntee unique DIDs -func FindUserMentions(source string) []string { - var ( - mentions []string - mentionsSet = make(map[string]struct{}) - md = NewMarkdown() - sourceBytes = []byte(source) - root = md.Parser().Parse(text.NewReader(sourceBytes)) - ) - ast.Walk(root, func(n ast.Node, entering bool) (ast.WalkStatus, error) { - if entering && n.Kind() == textension.KindAt { - handle := n.(*textension.AtNode).Handle - mentionsSet[handle] = struct{}{} - return ast.WalkSkipChildren, nil - } - return ast.WalkContinue, nil - }) - for handle := range mentionsSet { - mentions = append(mentions, handle) - } - return mentions -} - func isAbsoluteUrl(link string) bool { parsed, err := url.Parse(link) if err != nil { diff --git a/appview/pages/markup/reference_link.go b/appview/pages/markup/reference_link.go new file mode 100644 index 00000000..5b59510e --- /dev/null +++ b/appview/pages/markup/reference_link.go @@ -0,0 +1,124 @@ +package markup + +import ( + "maps" + "net/url" + "path" + "slices" + "strconv" + "strings" + + "github.com/yuin/goldmark/ast" + "github.com/yuin/goldmark/text" + "tangled.org/core/appview/models" + textension "tangled.org/core/appview/pages/markup/extension" +) + +// FindReferences collects all links referencing tangled-related objects +// like issues, PRs, comments or even @-mentions +// This funciton doesn't actually check for the existence of records in the DB +// or the PDS; it merely returns a list of what are presumed to be references. +func FindReferences(baseUrl string, source string) ([]string, []models.ReferenceLink) { + var ( + refLinkSet = make(map[models.ReferenceLink]struct{}) + mentionsSet = make(map[string]struct{}) + md = NewMarkdown() + sourceBytes = []byte(source) + root = md.Parser().Parse(text.NewReader(sourceBytes)) + ) + // trim url scheme. the SSL shouldn't matter + baseUrl = strings.TrimPrefix(baseUrl, "https://") + baseUrl = strings.TrimPrefix(baseUrl, "http://") + + ast.Walk(root, func(n ast.Node, entering bool) (ast.WalkStatus, error) { + if !entering { + return ast.WalkContinue, nil + } + switch n.Kind() { + case textension.KindAt: + handle := n.(*textension.AtNode).Handle + mentionsSet[handle] = struct{}{} + return ast.WalkSkipChildren, nil + case ast.KindLink: + dest := string(n.(*ast.Link).Destination) + ref := parseTangledLink(baseUrl, dest) + if ref != nil { + refLinkSet[*ref] = struct{}{} + } + return ast.WalkSkipChildren, nil + case ast.KindAutoLink: + an := n.(*ast.AutoLink) + if an.AutoLinkType == ast.AutoLinkURL { + dest := string(an.URL(sourceBytes)) + ref := parseTangledLink(baseUrl, dest) + if ref != nil { + refLinkSet[*ref] = struct{}{} + } + } + return ast.WalkSkipChildren, nil + } + return ast.WalkContinue, nil + }) + mentions := slices.Collect(maps.Keys(mentionsSet)) + references := slices.Collect(maps.Keys(refLinkSet)) + return mentions, references +} + +func parseTangledLink(baseHost string, urlStr string) *models.ReferenceLink { + u, err := url.Parse(urlStr) + if err != nil { + return nil + } + + if u.Host != "" && !strings.EqualFold(u.Host, baseHost) { + return nil + } + + p := path.Clean(u.Path) + parts := strings.FieldsFunc(p, func(r rune) bool { return r == '/' }) + if len(parts) < 4 { + // need at least: handle / repo / kind / id + return nil + } + + var ( + handle = parts[0] + repo = parts[1] + kindSeg = parts[2] + subjectSeg = parts[3] + ) + + handle = strings.TrimPrefix(handle, "@") + + var kind models.RefKind + switch kindSeg { + case "issues": + kind = models.RefKindIssue + case "pulls": + kind = models.RefKindPull + default: + return nil + } + + subjectId, err := strconv.Atoi(subjectSeg) + if err != nil { + return nil + } + var commentId *int + if u.Fragment != "" { + if strings.HasPrefix(u.Fragment, "comment-") { + commentIdStr := u.Fragment[len("comment-"):] + if id, err := strconv.Atoi(commentIdStr); err == nil { + commentId = &id + } + } + } + + return &models.ReferenceLink{ + Handle: handle, + Repo: repo, + Kind: kind, + SubjectId: subjectId, + CommentId: commentId, + } +} diff --git a/appview/pages/markup/reference_link_test.go b/appview/pages/markup/reference_link_test.go new file mode 100644 index 00000000..6712973c --- /dev/null +++ b/appview/pages/markup/reference_link_test.go @@ -0,0 +1,42 @@ +package markup_test + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "tangled.org/core/appview/models" + "tangled.org/core/appview/pages/markup" +) + +func TestMarkupParsing(t *testing.T) { + tests := []struct { + name string + source string + wantHandles []string + wantRefLinks []models.ReferenceLink + }{ + { + name: "normal link", + source: `[link](http://127.0.0.1:3000/alice.pds.tngl.boltless.dev/coolproj/issues/1)`, + wantHandles: make([]string, 0), + wantRefLinks: []models.ReferenceLink{ + {Handle: "alice.pds.tngl.boltless.dev", Repo: "coolproj", Kind: models.RefKindIssue, SubjectId: 1, CommentId: nil}, + }, + }, + { + name: "commonmark style autolink", + source: ``, + wantHandles: make([]string, 0), + wantRefLinks: []models.ReferenceLink{ + {Handle: "alice.pds.tngl.boltless.dev", Repo: "coolproj", Kind: models.RefKindIssue, SubjectId: 1, CommentId: nil}, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + handles, refLinks := markup.FindReferences("http://127.0.0.1:3000", tt.source) + assert.ElementsMatch(t, tt.wantHandles, handles) + assert.ElementsMatch(t, tt.wantRefLinks, refLinks) + }) + } +} diff --git a/appview/pulls/pulls.go b/appview/pulls/pulls.go index 572ceaa7..50d4728a 100644 --- a/appview/pulls/pulls.go +++ b/appview/pulls/pulls.go @@ -23,6 +23,7 @@ import ( "tangled.org/core/appview/oauth" "tangled.org/core/appview/pages" "tangled.org/core/appview/pages/markup" + "tangled.org/core/appview/refresolver" "tangled.org/core/appview/reporesolver" "tangled.org/core/appview/validator" "tangled.org/core/appview/xrpcclient" @@ -45,6 +46,7 @@ type Pulls struct { repoResolver *reporesolver.RepoResolver pages *pages.Pages idResolver *idresolver.Resolver + refResolver *refresolver.Resolver db *db.DB config *config.Config notifier notify.Notifier @@ -59,6 +61,7 @@ func New( repoResolver *reporesolver.RepoResolver, pages *pages.Pages, resolver *idresolver.Resolver, + refResolver *refresolver.Resolver, db *db.DB, config *config.Config, notifier notify.Notifier, @@ -72,6 +75,7 @@ func New( repoResolver: repoResolver, pages: pages, idResolver: resolver, + refResolver: refResolver, db: db, config: config, notifier: notifier, @@ -691,7 +695,6 @@ func (s *Pulls) RepoPulls(w http.ResponseWriter, r *http.Request) { } func (s *Pulls) PullComment(w http.ResponseWriter, r *http.Request) { - l := s.logger.With("handler", "PullComment") user := s.oauth.GetUser(r) f, err := s.repoResolver.Resolve(r) if err != nil { @@ -730,6 +733,8 @@ func (s *Pulls) PullComment(w http.ResponseWriter, r *http.Request) { return } + mentions, _ := s.refResolver.Resolve(r.Context(), body) + // Start a transaction tx, err := s.db.BeginTx(r.Context(), nil) if err != nil { @@ -789,15 +794,6 @@ func (s *Pulls) PullComment(w http.ResponseWriter, r *http.Request) { return } - rawMentions := markup.FindUserMentions(comment.Body) - idents := s.idResolver.ResolveIdents(r.Context(), rawMentions) - l.Debug("parsed mentions", "raw", rawMentions, "idents", idents) - var mentions []syntax.DID - for _, ident := range idents { - if ident != nil && !ident.Handle.IsInvalidHandle() { - mentions = append(mentions, ident.DID) - } - } s.notifier.NewPullComment(r.Context(), comment, mentions) s.pages.HxLocation(w, fmt.Sprintf("/%s/pulls/%d#comment-%d", f.OwnerSlashRepo(), pull.PullId, commentId)) diff --git a/appview/refresolver/resolver.go b/appview/refresolver/resolver.go new file mode 100644 index 00000000..329a117c --- /dev/null +++ b/appview/refresolver/resolver.go @@ -0,0 +1,65 @@ +package refresolver + +import ( + "context" + "log/slog" + + "github.com/bluesky-social/indigo/atproto/syntax" + "tangled.org/core/appview/config" + "tangled.org/core/appview/db" + "tangled.org/core/appview/models" + "tangled.org/core/appview/pages/markup" + "tangled.org/core/idresolver" +) + +type Resolver struct { + config *config.Config + idResolver *idresolver.Resolver + execer db.Execer + logger *slog.Logger +} + +func New( + config *config.Config, + idResolver *idresolver.Resolver, + execer db.Execer, + logger *slog.Logger, +) *Resolver { + return &Resolver{ + config, + idResolver, + execer, + logger, + } +} + +func (r *Resolver) Resolve(ctx context.Context, source string) ([]syntax.DID, []syntax.ATURI) { + l := r.logger.With("method", "find_references") + rawMentions, rawRefs := markup.FindReferences(r.config.Core.AppviewHost, source) + l.Debug("found possible references", "mentions", rawMentions, "refs", rawRefs) + idents := r.idResolver.ResolveIdents(ctx, rawMentions) + var mentions []syntax.DID + for _, ident := range idents { + if ident != nil && !ident.Handle.IsInvalidHandle() { + mentions = append(mentions, ident.DID) + } + } + l.Debug("found mentions", "mentions", mentions) + + var resolvedRefs []models.ReferenceLink + for _, rawRef := range rawRefs { + ident, err := r.idResolver.ResolveIdent(ctx, rawRef.Handle) + if err != nil || ident == nil || ident.Handle.IsInvalidHandle() { + continue + } + rawRef.Handle = string(ident.DID) + resolvedRefs = append(resolvedRefs, rawRef) + } + aturiRefs, err := db.FindReferences(r.execer, resolvedRefs) + if err != nil { + l.Error("failed running query", "err", err) + } + l.Debug("found references", "refs", aturiRefs) + + return mentions, aturiRefs +} diff --git a/appview/state/router.go b/appview/state/router.go index 84f47217..f2f8780c 100644 --- a/appview/state/router.go +++ b/appview/state/router.go @@ -260,6 +260,7 @@ func (s *State) IssuesRouter(mw *middleware.Middleware) http.Handler { s.repoResolver, s.pages, s.idResolver, + s.refResolver, s.db, s.config, s.notifier, @@ -276,6 +277,7 @@ func (s *State) PullsRouter(mw *middleware.Middleware) http.Handler { s.repoResolver, s.pages, s.idResolver, + s.refResolver, s.db, s.config, s.notifier, diff --git a/appview/state/state.go b/appview/state/state.go index ce632747..9f6a8074 100644 --- a/appview/state/state.go +++ b/appview/state/state.go @@ -21,6 +21,7 @@ import ( phnotify "tangled.org/core/appview/notify/posthog" "tangled.org/core/appview/oauth" "tangled.org/core/appview/pages" + "tangled.org/core/appview/refresolver" "tangled.org/core/appview/reporesolver" "tangled.org/core/appview/validator" xrpcclient "tangled.org/core/appview/xrpcclient" @@ -49,6 +50,7 @@ type State struct { enforcer *rbac.Enforcer pages *pages.Pages idResolver *idresolver.Resolver + refResolver *refresolver.Resolver posthog posthog.Client jc *jetstream.JetstreamClient config *config.Config @@ -98,6 +100,8 @@ func Make(ctx context.Context, config *config.Config) (*State, error) { repoResolver := reporesolver.New(config, enforcer, res, d) + refResolver := refresolver.New(config, res, d, log.SubLogger(logger, "refResolver")) + wrapper := db.DbWrapper{Execer: d} jc, err := jetstream.NewJetstreamClient( config.Jetstream.Endpoint, @@ -178,6 +182,7 @@ func Make(ctx context.Context, config *config.Config) (*State, error) { enforcer, pages, res, + refResolver, posthog, jc, config, -- 2.43.0