A set of utilities for working with the AT Protocol in Elixir.

feat: mix task for generating lexicon files from JSON

ovyerus.com bdc2040f 010cbea4

verified
Changed files
+111 -394
lib
atex
lexicon
validators
atproto
sh
comet
mix
priv
templates
+2
CHANGELOG.md
···
- `Atex.Lexicon` module that provides the `deflexicon` macro, taking in a JSON
Lexicon definition and converts it into a series of schemas for each
definition within it.
+
- `mix atex.lexicons` for converting lexicon JSON files into modules using
+
`deflexicon` easily.
## [0.3.0] - 2025-06-29
+1 -1
README.md
···
- [x] XRPC client
- [x] DID & handle resolution service with a cache
- [x] Macro for converting a Lexicon definition into a runtime-validation schema
-
- [ ] Codegen to convert a directory of lexicons
+
- [x] Codegen to convert a directory of lexicons
- [ ] Extended XRPC client with support for validated inputs/outputs
- [ ] Oauth stuff
+1 -1
lib/atex/lexicon.ex
···
def_to_schema(nsid, def_name, record)
end
-
# TODO: add `$type` field. It's just a string though.
+
# TODO: need to spit out an extra 'branded' type with `$type` field, for use in union refs.
defp def_to_schema(
nsid,
def_name,
-2
lib/atex/lexicon/validators/integer.ex
···
defmodule Atex.Lexicon.Validators.Integer do
-
alias Atex.Lexicon.Validators
-
@type option() ::
{:minimum, integer()}
| {:maximum, integer()}
+8 -2
lib/atex/nsid.ex
···
# maybe stuff for fetching the repo that belongs to an authority
@spec to_atom(String.t()) :: atom()
-
def to_atom(nsid) do
+
def to_atom(nsid, fully_qualify \\ true) do
nsid
|> String.split(".")
|> Enum.map(&String.capitalize/1)
-
|> then(&["Elixir" | &1])
+
|> then(fn parts ->
+
if fully_qualify do
+
["Elixir" | parts]
+
else
+
parts
+
end
+
end)
|> Enum.join(".")
|> String.to_atom()
end
-115
lib/atproto/sh/comet/v0/actor/profile.ex
···
-
defmodule Sh.Comet.V0.Actor.Profile do
-
use Atex.Lexicon
-
-
deflexicon(%{
-
"defs" => %{
-
"main" => %{
-
"description" => "A user's Comet profile.",
-
"key" => "literal:self",
-
"record" => %{
-
"properties" => %{
-
"avatar" => %{
-
"accept" => ["image/png", "image/jpeg"],
-
"description" =>
-
"Small image to be displayed next to posts from account. AKA, 'profile picture'",
-
"maxSize" => 1_000_000,
-
"type" => "blob"
-
},
-
"banner" => %{
-
"accept" => ["image/png", "image/jpeg"],
-
"description" => "Larger horizontal image to display behind profile view.",
-
"maxSize" => 1_000_000,
-
"type" => "blob"
-
},
-
"createdAt" => %{"format" => "datetime", "type" => "string"},
-
"description" => %{
-
"description" => "Free-form profile description text.",
-
"maxGraphemes" => 256,
-
"maxLength" => 2560,
-
"type" => "string"
-
},
-
"descriptionFacets" => %{
-
"description" => "Annotations of the user's description.",
-
"ref" => "sh.comet.v0.richtext.facet",
-
"type" => "ref"
-
},
-
"displayName" => %{
-
"maxGraphemes" => 64,
-
"maxLength" => 640,
-
"type" => "string"
-
},
-
"featuredItems" => %{
-
"description" => "Pinned items to be shown first on the user's profile.",
-
"items" => %{"format" => "at-uri", "type" => "string"},
-
"maxLength" => 5,
-
"type" => "array"
-
}
-
},
-
"type" => "object"
-
},
-
"type" => "record"
-
},
-
"view" => %{
-
"properties" => %{
-
"avatar" => %{"format" => "uri", "type" => "string"},
-
"createdAt" => %{"format" => "datetime", "type" => "string"},
-
"did" => %{"format" => "did", "type" => "string"},
-
"displayName" => %{
-
"maxGraphemes" => 64,
-
"maxLength" => 640,
-
"type" => "string"
-
},
-
"handle" => %{"format" => "handle", "type" => "string"},
-
"indexedAt" => %{"format" => "datetime", "type" => "string"},
-
"viewer" => %{"ref" => "#viewerState", "type" => "ref"}
-
},
-
"required" => ["did", "handle"],
-
"type" => "object"
-
},
-
"viewFull" => %{
-
"properties" => %{
-
"avatar" => %{"format" => "uri", "type" => "string"},
-
"banner" => %{"format" => "uri", "type" => "string"},
-
"createdAt" => %{"format" => "datetime", "type" => "string"},
-
"description" => %{
-
"maxGraphemes" => 256,
-
"maxLength" => 2560,
-
"type" => "string"
-
},
-
"descriptionFacets" => %{
-
"ref" => "sh.comet.v0.richtext.facet",
-
"type" => "ref"
-
},
-
"did" => %{"format" => "did", "type" => "string"},
-
"displayName" => %{
-
"maxGraphemes" => 64,
-
"maxLength" => 640,
-
"type" => "string"
-
},
-
"featuredItems" => %{
-
"items" => %{"format" => "at-uri", "type" => "string"},
-
"maxLength" => 5,
-
"type" => "array"
-
},
-
"followersCount" => %{"type" => "integer"},
-
"followsCount" => %{"type" => "integer"},
-
"handle" => %{"format" => "handle", "type" => "string"},
-
"indexedAt" => %{"format" => "datetime", "type" => "string"},
-
"playlistsCount" => %{"type" => "integer"},
-
"tracksCount" => %{"type" => "integer"},
-
"viewer" => %{"ref" => "#viewerState", "type" => "ref"}
-
},
-
"required" => ["did", "handle"],
-
"type" => "object"
-
},
-
"viewerState" => %{
-
"description" =>
-
"Metadata about the requesting account's relationship with the user. TODO: determine if we create our own graph or inherit bsky's.",
-
"properties" => %{},
-
"type" => "object"
-
}
-
},
-
"id" => "sh.comet.v0.actor.profile",
-
"lexicon" => 1
-
})
-
end
-44
lib/atproto/sh/comet/v0/feed/defs.ex
···
-
defmodule Sh.Comet.V0.Feed.Defs do
-
use Atex.Lexicon
-
-
deflexicon(%{
-
"defs" => %{
-
"buyLink" => %{
-
"description" => "Indicate the link leads to a purchase page for the track.",
-
"type" => "token"
-
},
-
"downloadLink" => %{
-
"description" => "Indicate the link leads to a free download for the track.",
-
"type" => "token"
-
},
-
"link" => %{
-
"description" =>
-
"Link for the track. Usually to acquire it in some way, e.g. via free download or purchase. | TODO: multiple links?",
-
"properties" => %{
-
"type" => %{
-
"knownValues" => [
-
"sh.comet.v0.feed.defs#downloadLink",
-
"sh.comet.v0.feed.defs#buyLink"
-
],
-
"type" => "string"
-
},
-
"value" => %{"format" => "uri", "type" => "string"}
-
},
-
"required" => ["type", "value"],
-
"type" => "object"
-
},
-
"viewerState" => %{
-
"description" =>
-
"Metadata about the requesting account's relationship with the subject content. Only has meaningful content for authed requests.",
-
"properties" => %{
-
"featured" => %{"type" => "boolean"},
-
"like" => %{"format" => "at-uri", "type" => "string"},
-
"repost" => %{"format" => "at-uri", "type" => "string"}
-
},
-
"type" => "object"
-
}
-
},
-
"id" => "sh.comet.v0.feed.defs",
-
"lexicon" => 1
-
})
-
end
-45
lib/atproto/sh/comet/v0/feed/getActorTracks.ex
···
-
defmodule Sh.Comet.V0.Feed.GetActorTracks do
-
use Atex.Lexicon
-
-
deflexicon(%{
-
"defs" => %{
-
"main" => %{
-
"description" => "Get a list of an actor's tracks.",
-
"output" => %{
-
"encoding" => "application/json",
-
"schema" => %{
-
"properties" => %{
-
"cursor" => %{"type" => "string"},
-
"tracks" => %{
-
"items" => %{
-
"ref" => "sh.comet.v0.feed.track#view",
-
"type" => "ref"
-
},
-
"type" => "array"
-
}
-
},
-
"required" => ["tracks"],
-
"type" => "object"
-
}
-
},
-
"parameters" => %{
-
"properties" => %{
-
"actor" => %{"format" => "at-identifier", "type" => "string"},
-
"cursor" => %{"type" => "string"},
-
"limit" => %{
-
"default" => 50,
-
"maximum" => 100,
-
"minimum" => 1,
-
"type" => "integer"
-
}
-
},
-
"required" => ["actor"],
-
"type" => "params"
-
},
-
"type" => "query"
-
}
-
},
-
"id" => "sh.comet.v0.feed.getActorTracks",
-
"lexicon" => 1
-
})
-
end
-114
lib/atproto/sh/comet/v0/feed/track.ex
···
-
defmodule Sh.Comet.V0.Feed.Track do
-
use Atex.Lexicon
-
-
deflexicon(%{
-
"defs" => %{
-
"main" => %{
-
"description" =>
-
"A Comet audio track. TODO: should probably have some sort of pre-calculated waveform, or have a query to get one from a blob?",
-
"key" => "tid",
-
"record" => %{
-
"properties" => %{
-
"audio" => %{
-
"accept" => ["audio/ogg"],
-
"description" =>
-
"Audio of the track, ideally encoded as 96k Opus. Limited to 100mb.",
-
"maxSize" => 100_000_000,
-
"type" => "blob"
-
},
-
"createdAt" => %{
-
"description" => "Timestamp for when the track entry was originally created.",
-
"format" => "datetime",
-
"type" => "string"
-
},
-
"description" => %{
-
"description" => "Description of the track.",
-
"maxGraphemes" => 2000,
-
"maxLength" => 20000,
-
"type" => "string"
-
},
-
"descriptionFacets" => %{
-
"description" => "Annotations of the track's description.",
-
"ref" => "sh.comet.v0.richtext.facet",
-
"type" => "ref"
-
},
-
"explicit" => %{
-
"description" =>
-
"Whether the track contains explicit content that may objectionable to some people, usually swearing or adult themes.",
-
"type" => "boolean"
-
},
-
"image" => %{
-
"accept" => ["image/png", "image/jpeg"],
-
"description" => "Image to be displayed representing the track.",
-
"maxSize" => 1_000_000,
-
"type" => "blob"
-
},
-
"link" => %{"ref" => "sh.comet.v0.feed.defs#link", "type" => "ref"},
-
"releasedAt" => %{
-
"description" =>
-
"Timestamp for when the track was released. If in the future, may be used to implement pre-savable tracks.",
-
"format" => "datetime",
-
"type" => "string"
-
},
-
"tags" => %{
-
"description" => "Hashtags for the track, usually for genres.",
-
"items" => %{
-
"maxGraphemes" => 64,
-
"maxLength" => 640,
-
"type" => "string"
-
},
-
"maxLength" => 8,
-
"type" => "array"
-
},
-
"title" => %{
-
"description" =>
-
"Title of the track. Usually shouldn't include the creator's name.",
-
"maxGraphemes" => 256,
-
"maxLength" => 2560,
-
"minLength" => 1,
-
"type" => "string"
-
}
-
},
-
"required" => ["audio", "title", "createdAt"],
-
"type" => "object"
-
},
-
"type" => "record"
-
},
-
"view" => %{
-
"properties" => %{
-
"audio" => %{
-
"description" =>
-
"URL pointing to where the audio data for the track can be fetched. May be re-encoded from the original blob.",
-
"format" => "uri",
-
"type" => "string"
-
},
-
"author" => %{
-
"ref" => "sh.comet.v0.actor.profile#viewFull",
-
"type" => "ref"
-
},
-
"cid" => %{"format" => "cid", "type" => "string"},
-
"commentCount" => %{"type" => "integer"},
-
"image" => %{
-
"description" => "URL pointing to where the image for the track can be fetched.",
-
"format" => "uri",
-
"type" => "string"
-
},
-
"indexedAt" => %{"format" => "datetime", "type" => "string"},
-
"likeCount" => %{"type" => "integer"},
-
"playCount" => %{"type" => "integer"},
-
"record" => %{"ref" => "#main", "type" => "ref"},
-
"repostCount" => %{"type" => "integer"},
-
"uri" => %{"format" => "at-uri", "type" => "string"},
-
"viewer" => %{
-
"ref" => "sh.comet.v0.feed.defs#viewerState",
-
"type" => "ref"
-
}
-
},
-
"required" => ["uri", "cid", "author", "audio", "record", "indexedAt"],
-
"type" => "object"
-
}
-
},
-
"id" => "sh.comet.v0.feed.track",
-
"lexicon" => 1
-
})
-
end
-70
lib/atproto/sh/comet/v0/richtext/facet.ex
···
-
defmodule Sh.Comet.V0.Richtext.Facet do
-
use Atex.Lexicon
-
-
deflexicon(%{
-
"defs" => %{
-
"byteSlice" => %{
-
"description" =>
-
"Specifies the sub-string range a facet feature applies to. Start index is inclusive, end index is exclusive. Indices are zero-indexed, counting bytes of the UTF-8 encoded text. NOTE: some languages, like Javascript, use UTF-16 or Unicode codepoints for string slice indexing; in these languages, convert to byte arrays before working with facets.",
-
"properties" => %{
-
"byteEnd" => %{"minimum" => 0, "type" => "integer"},
-
"byteStart" => %{"minimum" => 0, "type" => "integer"}
-
},
-
"required" => ["byteStart", "byteEnd"],
-
"type" => "object"
-
},
-
"link" => %{
-
"description" =>
-
"Facet feature for a URL. The text URL may have been simplified or truncated, but the facet reference should be a complete URL.",
-
"properties" => %{"uri" => %{"format" => "uri", "type" => "string"}},
-
"required" => ["uri"],
-
"type" => "object"
-
},
-
"main" => %{
-
"description" => "Annotation of a sub-string within rich text.",
-
"properties" => %{
-
"features" => %{
-
"items" => %{
-
"refs" => ["#mention", "#link", "#tag"],
-
"type" => "union"
-
},
-
"type" => "array"
-
},
-
"index" => %{"ref" => "#byteSlice", "type" => "ref"}
-
},
-
"required" => ["index", "features"],
-
"type" => "object"
-
},
-
"mention" => %{
-
"description" =>
-
"Facet feature for mention of another account. The text is usually a handle, including a '@' prefix, but the facet reference is a DID.",
-
"properties" => %{"did" => %{"format" => "did", "type" => "string"}},
-
"required" => ["did"],
-
"type" => "object"
-
},
-
"tag" => %{
-
"description" =>
-
"Facet feature for a hashtag. The text usually includes a '#' prefix, but the facet reference should not (except in the case of 'double hash tags').",
-
"properties" => %{
-
"tag" => %{"maxGraphemes" => 64, "maxLength" => 640, "type" => "string"}
-
},
-
"required" => ["tag"],
-
"type" => "object"
-
},
-
"timestamp" => %{
-
"description" =>
-
"Facet feature for a timestamp in a track. The text usually is in the format of 'hh:mm:ss' with the hour section being omitted if unnecessary.",
-
"properties" => %{
-
"timestamp" => %{
-
"description" => "Reference time, in seconds.",
-
"minimum" => 0,
-
"type" => "integer"
-
}
-
},
-
"type" => "object"
-
}
-
},
-
"id" => "sh.comet.v0.richtext.facet",
-
"lexicon" => 1
-
})
-
end
+94
lib/mix/tasks/atex.lexicons.ex
···
+
defmodule Mix.Tasks.Atex.Lexicons do
+
@moduledoc """
+
Generate Elixir modules from AT Protocol lexicons, which can then be used to
+
validate data at runtime.
+
+
AT Protocol lexicons are JSON files that define parts of the AT Protocol data
+
model. This task processes these lexicon files and generates corresponding
+
Elixir modules.
+
+
## Usage
+
+
mix atex.lexicons [OPTIONS] [PATHS]
+
+
## Arguments
+
+
- `PATHS` - List of lexicon files to process. Also supports standard glob
+
syntax for reading many lexicons at once.
+
+
## Options
+
+
- `-o`/`--output` - Output directory for generated modules (default:
+
`lib/atproto`)
+
+
## Examples
+
+
Process all JSON files in the lexicons directory:
+
+
mix atex.lexicons lexicons/**/*.json
+
+
Process specific lexicon files:
+
+
mix atex.lexicons lexicons/com/atproto/repo/*.json lexicons/app/bsky/actor/profile.json
+
+
Generate modules to a custom output directory:
+
+
mix atex.lexicons lexicons/**/*.json --output lib/my_atproto
+
"""
+
@shortdoc "Generate Elixir modules from AT Protocol lexicons."
+
+
use Mix.Task
+
require EEx
+
+
@switches [output: :string]
+
@aliases [o: :output]
+
@template_path Path.expand("../../../priv/templates/lexicon.eex", __DIR__)
+
+
@impl Mix.Task
+
def run(args) do
+
{options, globs} = OptionParser.parse!(args, switches: @switches, aliases: @aliases)
+
+
output = Keyword.get(options, :output, "lib/atproto")
+
paths = Enum.flat_map(globs, &Path.wildcard/1)
+
+
if length(paths) == 0 do
+
Mix.shell().error("No valid search paths have been provided, aborting.")
+
else
+
Mix.shell().info("Generating modules for lexicons into #{output}")
+
+
Enum.each(paths, fn path ->
+
Mix.shell().info("- #{path}")
+
generate(path, output)
+
end)
+
end
+
end
+
+
# TODO: validate schema?
+
defp generate(input, output) do
+
lexicon =
+
input
+
|> File.read!()
+
|> JSON.decode!()
+
+
if not is_binary(lexicon["id"]) do
+
raise ArgumentError, message: "Malformed lexicon: does not have an `id` field."
+
end
+
+
code = lexicon |> template() |> Code.format_string!() |> Enum.join("")
+
+
file_path =
+
lexicon["id"]
+
|> String.split(".")
+
|> Enum.join("/")
+
|> then(&(&1 <> ".ex"))
+
|> then(&Path.join(output, &1))
+
+
file_path
+
|> Path.dirname()
+
|> File.mkdir_p!()
+
+
File.write!(file_path, code)
+
end
+
+
EEx.function_from_file(:defp, :template, @template_path, [:lexicon])
+
end
+5
priv/templates/lexicon.eex
···
+
defmodule <%= Atex.NSID.to_atom(lexicon["id"], false) %> do
+
use Atex.Lexicon
+
+
deflexicon(<%= inspect(lexicon, limit: :infinity, pretty: true, printable_limit: :infinity) %>)
+
end