forked from tangled.org/core
this repo has no description

spindle: rework logging

docker always writes to disk, this is then streamed on demand via the
Logs endpoint

Signed-off-by: oppiliappan <me@oppi.li>

oppi.li 9c251ff8 944d0f1a

verified
Changed files
+98 -269
spindle
+8 -117
spindle/engine/engine.go
···
package engine
import (
-
"bufio"
"context"
"errors"
"fmt"
···
n *notifier.Notifier
cfg *config.Config
-
chanMu sync.RWMutex
-
stdoutChans map[string]chan string
-
stderrChans map[string]chan string
-
cleanupMu sync.Mutex
cleanup map[string][]cleanupFunc
}
···
n: n,
cfg: cfg,
}
-
-
e.stdoutChans = make(map[string]chan string, 100)
-
e.stderrChans = make(map[string]chan string, 100)
e.cleanup = make(map[string][]cleanupFunc)
···
}
e.l.Info("using step timeout", "timeout", stepTimeout)
-
e.chanMu.Lock()
-
if _, exists := e.stdoutChans[wid.String()]; !exists {
-
e.stdoutChans[wid.String()] = make(chan string, 100)
-
}
-
if _, exists := e.stderrChans[wid.String()]; !exists {
-
e.stderrChans[wid.String()] = make(chan string, 100)
-
}
-
e.chanMu.Unlock()
-
-
// close channels after all steps are complete
-
defer func() {
-
close(e.stdoutChans[wid.String()])
-
close(e.stderrChans[wid.String()])
-
}()
-
for stepIdx, step := range steps {
envs := ConstructEnvs(step.Environment)
envs.AddEnv("HOME", workspaceDir)
···
if state.ExitCode != 0 {
e.l.Error("workflow failed!", "workflow_id", wid.String(), "error", state.Error, "exit_code", state.ExitCode, "oom_killed", state.OOMKilled)
-
err := e.db.StatusFailed(wid, state.Error, int64(state.ExitCode), e.n)
-
if err != nil {
-
return err
-
}
return fmt.Errorf("error: %s, exit code: %d, oom: %t", state.Error, state.ExitCode, state.OOMKilled)
}
}
···
Follow: true,
ShowStdout: true,
ShowStderr: true,
-
Details: true,
+
Details: false,
Timestamps: false,
})
if err != nil {
return err
}
-
stepLogger, err := NewStepLogger(e.cfg.Pipelines.LogDir, wid.String(), stepIdx)
+
wfLogger, err := NewWorkflowLogger(e.cfg.Pipelines.LogDir, wid)
if err != nil {
e.l.Warn("failed to setup step logger; logs will not be persisted", "error", err)
+
return err
}
-
-
var logOutput io.Writer = io.Discard
+
defer wfLogger.Close()
-
if e.cfg.Server.Dev {
-
logOutput = &ansiStrippingWriter{underlying: os.Stdout}
+
_, err = stdcopy.StdCopy(wfLogger.Stdout(), wfLogger.Stderr(), logs)
+
if err != nil && err != io.EOF && !errors.Is(err, context.DeadlineExceeded) {
+
return fmt.Errorf("failed to copy logs: %w", err)
}
-
tee := io.TeeReader(logs, logOutput)
-
-
// using StdCopy we demux logs and stream stdout and stderr to different
-
// channels.
-
//
-
// stdout w||r stdoutCh
-
// stderr w||r stderrCh
-
//
-
-
rpipeOut, wpipeOut := io.Pipe()
-
rpipeErr, wpipeErr := io.Pipe()
-
-
// sets up a io.MultiWriter to write to both the pipe
-
// and the file-based logger.
-
multiOut := io.MultiWriter(wpipeOut, stepLogger.Stdout())
-
multiErr := io.MultiWriter(wpipeErr, stepLogger.Stderr())
-
-
wg := sync.WaitGroup{}
-
-
wg.Add(1)
-
go func() {
-
defer wg.Done()
-
defer wpipeOut.Close()
-
defer wpipeErr.Close()
-
defer stepLogger.Close()
-
_, err := stdcopy.StdCopy(multiOut, multiErr, tee)
-
if err != nil && err != io.EOF && !errors.Is(context.DeadlineExceeded, err) {
-
e.l.Error("failed to copy logs", "error", err)
-
}
-
}()
-
-
// read from stdout and send to stdout pipe
-
// NOTE: the stdoutCh channnel is closed further up in StartSteps
-
// once all steps are done.
-
wg.Add(1)
-
go func() {
-
defer wg.Done()
-
e.chanMu.RLock()
-
stdoutCh := e.stdoutChans[wid.String()]
-
e.chanMu.RUnlock()
-
-
scanner := bufio.NewScanner(rpipeOut)
-
for scanner.Scan() {
-
stdoutCh <- scanner.Text()
-
}
-
if err := scanner.Err(); err != nil {
-
e.l.Error("failed to scan stdout", "error", err)
-
}
-
}()
-
-
// read from stderr and send to stderr pipe
-
// NOTE: the stderrCh channnel is closed further up in StartSteps
-
// once all steps are done.
-
wg.Add(1)
-
go func() {
-
defer wg.Done()
-
e.chanMu.RLock()
-
stderrCh := e.stderrChans[wid.String()]
-
e.chanMu.RUnlock()
-
-
scanner := bufio.NewScanner(rpipeErr)
-
for scanner.Scan() {
-
stderrCh <- scanner.Text()
-
}
-
if err := scanner.Err(); err != nil {
-
e.l.Error("failed to scan stderr", "error", err)
-
}
-
}()
-
-
wg.Wait()
-
return nil
}
···
}
}
return nil
-
}
-
-
func (e *Engine) LogChannels(wid models.WorkflowId) (stdout <-chan string, stderr <-chan string, ok bool) {
-
e.chanMu.RLock()
-
defer e.chanMu.RUnlock()
-
-
stdoutCh, ok1 := e.stdoutChans[wid.String()]
-
stderrCh, ok2 := e.stderrChans[wid.String()]
-
-
if !ok1 || !ok2 {
-
return nil, nil, false
-
}
-
return stdoutCh, stderrCh, true
}
func (e *Engine) registerCleanup(wid models.WorkflowId, fn cleanupFunc) {
···
CapDrop: []string{"ALL"},
CapAdd: []string{"CAP_DAC_OVERRIDE"},
SecurityOpt: []string{"no-new-privileges"},
+
ExtraHosts: []string{"host.docker.internal:host-gateway"},
}
return hostConfig
+53 -37
spindle/engine/logger.go
···
package engine
import (
+
"encoding/json"
"fmt"
"io"
"os"
"path/filepath"
+
"strings"
+
+
"tangled.sh/tangled.sh/core/spindle/models"
)
-
type StepLogger struct {
-
stderr *os.File
-
stdout *os.File
+
type WorkflowLogger struct {
+
file *os.File
+
encoder *json.Encoder
}
-
func NewStepLogger(baseDir, workflowID string, stepIdx int) (*StepLogger, error) {
-
dir := filepath.Join(baseDir, workflowID)
+
func NewWorkflowLogger(baseDir string, wid models.WorkflowId) (*WorkflowLogger, error) {
+
dir := filepath.Join(baseDir, wid.String())
if err := os.MkdirAll(dir, 0755); err != nil {
return nil, fmt.Errorf("creating log dir: %w", err)
}
-
stdoutPath := logFilePath(baseDir, workflowID, "stdout", stepIdx)
-
stderrPath := logFilePath(baseDir, workflowID, "stderr", stepIdx)
+
path := LogFilePath(baseDir, wid)
-
stdoutFile, err := os.Create(stdoutPath)
+
file, err := os.Create(path)
if err != nil {
-
return nil, fmt.Errorf("creating stdout log file: %w", err)
+
return nil, fmt.Errorf("creating log file: %w", err)
}
-
stderrFile, err := os.Create(stderrPath)
+
return &WorkflowLogger{
+
file: file,
+
encoder: json.NewEncoder(file),
+
}, nil
+
}
+
+
func (l *WorkflowLogger) Write(p []byte) (n int, err error) {
+
return l.file.Write(p)
+
}
+
+
func (l *WorkflowLogger) Close() error {
+
return l.file.Close()
+
}
+
+
func OpenLogFile(baseDir string, workflowID models.WorkflowId) (*os.File, error) {
+
logPath := LogFilePath(baseDir, workflowID)
+
+
file, err := os.Open(logPath)
if err != nil {
-
stdoutFile.Close()
-
return nil, fmt.Errorf("creating stderr log file: %w", err)
+
return nil, fmt.Errorf("error opening log file: %w", err)
}
-
return &StepLogger{
-
stdout: stdoutFile,
-
stderr: stderrFile,
-
}, nil
+
return file, nil
}
-
func (l *StepLogger) Stdout() io.Writer {
-
return l.stdout
+
func LogFilePath(baseDir string, workflowID models.WorkflowId) string {
+
logFilePath := filepath.Join(baseDir, fmt.Sprintf("%s.log", workflowID.String()))
+
return logFilePath
+
}
+
+
func (l *WorkflowLogger) Stdout() io.Writer {
+
return &jsonWriter{logger: l, stream: "stdout"}
}
-
func (l *StepLogger) Stderr() io.Writer {
-
return l.stderr
+
func (l *WorkflowLogger) Stderr() io.Writer {
+
return &jsonWriter{logger: l, stream: "stderr"}
}
-
func (l *StepLogger) Close() error {
-
err1 := l.stdout.Close()
-
err2 := l.stderr.Close()
-
if err1 != nil {
-
return err1
-
}
-
return err2
+
type jsonWriter struct {
+
logger *WorkflowLogger
+
stream string
}
-
func ReadStepLog(baseDir, workflowID, stream string, stepIdx int) (string, error) {
-
logPath := logFilePath(baseDir, workflowID, stream, stepIdx)
+
func (w *jsonWriter) Write(p []byte) (int, error) {
+
line := strings.TrimRight(string(p), "\r\n")
-
data, err := os.ReadFile(logPath)
-
if err != nil {
-
return "", fmt.Errorf("error reading log file: %w", err)
+
entry := models.LogLine{
+
Stream: w.stream,
+
Data: line,
}
-
return string(data), nil
-
}
+
if err := w.logger.encoder.Encode(entry); err != nil {
+
return 0, err
+
}
-
func logFilePath(baseDir, workflowID, stream string, stepIdx int) string {
-
logFilePath := filepath.Join(baseDir, workflowID, fmt.Sprintf("%d-%s.log", stepIdx, stream))
-
return logFilePath
+
return len(p), nil
}
+5
spindle/models/models.go
···
func (s StatusKind) IsFinish() bool {
return slices.Contains(FinishStates[:], s)
}
+
+
type LogLine struct {
+
Stream string `json:"s"`
+
Data string `json:"d"`
+
}
+5
spindle/models/pipeline.go
···
swf.Image = workflowImage(twf.Dependencies, cfg.Pipelines.Nixery)
swf.addNixProfileToPath()
+
swf.enableNixFlakes()
setup := &setupSteps{}
setup.addStep(nixConfStep())
···
func (wf *Workflow) addNixProfileToPath() {
wf.Environment["PATH"] = "$PATH:/.nix-profile/bin"
}
+
+
func (wf *Workflow) enableNixFlakes() {
+
wf.Environment["NIX_CONFIG"] = "experimental-features = nix-command flakes"
+
}
-1
spindle/server.go
···
w.Write([]byte(s.cfg.Server.Owner))
})
mux.HandleFunc("/logs/{knot}/{rkey}/{name}", s.Logs)
-
mux.HandleFunc("/logs/{knot}/{rkey}/{name}/{idx}", s.StepLogs)
return mux
}
+27 -114
spindle/stream.go
···
package spindle
import (
-
"bufio"
"context"
"encoding/json"
"fmt"
"net/http"
"strconv"
-
"strings"
"time"
"tangled.sh/tangled.sh/core/spindle/engine"
···
"github.com/go-chi/chi/v5"
"github.com/gorilla/websocket"
+
"github.com/hpcloud/tail"
)
var upgrader = websocket.Upgrader{
···
return
}
-
s.handleLogStream(w, r, func(ctx context.Context, conn *websocket.Conn) error {
-
return s.streamLogs(ctx, conn, wid)
-
})
-
}
-
-
func (s *Spindle) StepLogs(w http.ResponseWriter, r *http.Request) {
-
wid, err := getWorkflowID(r)
-
if err != nil {
-
http.Error(w, err.Error(), http.StatusBadRequest)
-
return
-
}
-
-
idxStr := chi.URLParam(r, "idx")
-
if idxStr == "" {
-
http.Error(w, "step index required", http.StatusBadRequest)
-
return
-
}
-
idx, err := strconv.Atoi(idxStr)
-
if err != nil {
-
http.Error(w, "bad step index", http.StatusBadRequest)
-
return
-
}
-
-
s.handleLogStream(w, r, func(ctx context.Context, conn *websocket.Conn) error {
-
return s.streamLogFromDisk(ctx, conn, wid, idx)
-
})
-
}
-
-
func (s *Spindle) handleLogStream(w http.ResponseWriter, r *http.Request, streamFn func(ctx context.Context, conn *websocket.Conn) error) {
l := s.l.With("handler", "Logs")
+
l = s.l.With("wid", wid)
conn, err := upgrader.Upgrade(w, r, nil)
if err != nil {
···
}
}()
-
if err := streamFn(ctx, conn); err != nil {
+
if err := s.streamLogsFromDisk(ctx, conn, wid); err != nil {
l.Error("log stream failed", "err", err)
}
l.Debug("logs connection closed")
}
-
func (s *Spindle) streamLogs(ctx context.Context, conn *websocket.Conn, wid models.WorkflowId) error {
-
l := s.l.With("workflow_id", wid.String())
+
func (s *Spindle) streamLogsFromDisk(ctx context.Context, conn *websocket.Conn, wid models.WorkflowId) error {
+
filePath := engine.LogFilePath(s.cfg.Pipelines.LogDir, wid)
-
stdoutCh, stderrCh, ok := s.eng.LogChannels(wid)
-
if !ok {
-
return fmt.Errorf("workflow_id %q not found", wid.String())
+
config := tail.Config{
+
Follow: true,
+
ReOpen: true,
+
MustExist: false,
+
Location: &tail.SeekInfo{Offset: 0, Whence: 0},
+
Logger: tail.DiscardingLogger,
}
-
done := make(chan struct{})
+
t, err := tail.TailFile(filePath, config)
+
if err != nil {
+
return fmt.Errorf("failed to tail log file: %w", err)
+
}
+
defer t.Stop()
-
go func() {
-
for {
-
select {
-
case line, ok := <-stdoutCh:
-
if !ok {
-
done <- struct{}{}
-
return
-
}
-
msg := map[string]string{"type": "stdout", "data": line}
-
if err := conn.WriteJSON(msg); err != nil {
-
l.Error("write stdout failed", "err", err)
-
done <- struct{}{}
-
return
-
}
-
case <-ctx.Done():
-
done <- struct{}{}
-
return
+
for {
+
select {
+
case <-ctx.Done():
+
return ctx.Err()
+
case line := <-t.Lines:
+
if line == nil {
+
return fmt.Errorf("tail channel closed unexpectedly")
}
-
}
-
}()
-
go func() {
-
for {
-
select {
-
case line, ok := <-stderrCh:
-
if !ok {
-
done <- struct{}{}
-
return
-
}
-
msg := map[string]string{"type": "stderr", "data": line}
-
if err := conn.WriteJSON(msg); err != nil {
-
l.Error("write stderr failed", "err", err)
-
done <- struct{}{}
-
return
-
}
-
case <-ctx.Done():
-
done <- struct{}{}
-
return
+
if line.Err != nil {
+
return fmt.Errorf("error tailing log file: %w", line.Err)
}
-
}
-
}()
-
-
select {
-
case <-done:
-
case <-ctx.Done():
-
}
-
return nil
-
}
-
-
func (s *Spindle) streamLogFromDisk(ctx context.Context, conn *websocket.Conn, wid models.WorkflowId, stepIdx int) error {
-
streams := []string{"stdout", "stderr"}
-
-
for _, stream := range streams {
-
data, err := engine.ReadStepLog(s.cfg.Pipelines.LogDir, wid.String(), stream, stepIdx)
-
if err != nil {
-
// log but continue to next stream
-
s.l.Error("failed to read step log", "stream", stream, "step", stepIdx, "wid", wid.String(), "err", err)
-
continue
-
}
-
-
scanner := bufio.NewScanner(strings.NewReader(data))
-
for scanner.Scan() {
-
select {
-
case <-ctx.Done():
-
return ctx.Err()
-
default:
-
msg := map[string]string{
-
"type": stream,
-
"data": scanner.Text(),
-
}
-
if err := conn.WriteJSON(msg); err != nil {
-
return err
-
}
+
if err := conn.WriteMessage(websocket.TextMessage, []byte(line.Text)); err != nil {
+
return fmt.Errorf("failed to write to websocket: %w", err)
}
}
-
-
if err := scanner.Err(); err != nil {
-
return fmt.Errorf("error scanning %s log: %w", stream, err)
-
}
}
-
-
return nil
}
func (s *Spindle) streamPipelines(conn *websocket.Conn, cursor *int64) error {