commit b7fa38651ab43235857ddd008a81ee2379f32c4a · veryroundbird.house/core

+6 -6

nix/vm.nix

···

       43
        
                     guest.port = 6000;

     

       44
        
                   }

     

       45
        
                   # spindle

     

       46
       -
                   {

     

       47
       -
                     from = "host";

     

       48
       -
                     host.port = 6555;

     

       49
       -
                     guest.port = 6555;

     

       50
       -
                   }

     

       51
        
                 ];

     

       52
        
                 sharedDirectories = {

     

       53
        
                   # We can't use the 9p mounts directly for most of these

     
···

       83
        
                 };

     

       84
        
               };

     

       85
        
               services.tangled-spindle = {

     

       86
       -
                 enable = true;

     

       87
        
                 server = {

     

       88
        
                   owner = envVar "TANGLED_VM_SPINDLE_OWNER";

     

       89
        
                   hostname = "localhost:6555";

···

       43
        
                     guest.port = 6000;

     

       44
        
                   }

     

       45
        
                   # spindle

     

       46
       +
                   # {

     

       47
       +
                   #   from = "host";

     

       48
       +
                   #   host.port = 6555;

     

       49
       +
                   #   guest.port = 6555;

     

       50
       +
                   # }

     

       51
        
                 ];

     

       52
        
                 sharedDirectories = {

     

       53
        
                   # We can't use the 9p mounts directly for most of these

     
···

       83
        
                 };

     

       84
        
               };

     

       85
        
               services.tangled-spindle = {

     

       86
       +
                 enable = false;

     

       87
        
                 server = {

     

       88
        
                   owner = envVar "TANGLED_VM_SPINDLE_OWNER";

     

       89
        
                   hostname = "localhost:6555";

+90 -26

spindle/engines/nixery/engine.go

···

       173
        
       func (e *Engine) SetupWorkflow(ctx context.Context, wid models.WorkflowId, wf *models.Workflow) error {

     

       174
        
       	e.l.Info("setting up workflow", "workflow", wid)

     

       175
        
       

     

       176
       -
       	_, err := e.docker.NetworkCreate(ctx, networkName(wid), network.CreateOptions{

     

       177
       -
       		Driver: "bridge",

     

       178
       -
       	})

     

       179
       -
       	if err != nil {

     

       180
       -
       		return err

     

       181
       -
       	}

     

       182
       -
       	e.registerCleanup(wid, func(ctx context.Context) error {

     

       183
       -
       		return e.docker.NetworkRemove(ctx, networkName(wid))

     

       184
       -
       	})

     

       185
       -
       

     

       186
        
       	addl := wf.Data.(addlFields)

     

       187
        
       

     

       188
        
       	reader, err := e.docker.ImagePull(ctx, addl.image, image.PullOptions{})

     
···

       193
        
       	}

     

       194
        
       	defer reader.Close()

     

       195
        
       	io.Copy(os.Stdout, reader)

     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       196
        
       

     

       197
        
       	resp, err := e.docker.ContainerCreate(ctx, &container.Config{

     

       198
        
       		Image:      addl.image,

     
···

       294
        
       	for _, s := range secrets {

     

       295
        
       		workflowEnvs.AddEnv(s.Key, s.Value)

     

       296
        
       	}

     

       297
       -
       

     

       298
        
       	step := w.Steps[idx].(Step)

     

       299
       -
       

     

       300
        
       	select {

     

       301
        
       	case <-ctx.Done():

     

       302
        
       		return ctx.Err()

     

       303
        
       	default:

     

       304
        
       	}

     

       305
       -
       

     

       306
        
       	envs := append(EnvVars(nil), workflowEnvs...)

     

       307
        
       	for k, v := range step.environment {

     

       308
        
       		envs.AddEnv(k, v)

     

       309
        
       	}

     

       310
        
       	envs.AddEnv("HOME", homeDir)

     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       311
        
       

     

       312
        
       	mkExecResp, err := e.docker.ContainerExecCreate(ctx, addl.container, container.ExecOptions{

     

       313
        
       		Cmd:          []string{"bash", "-c", step.command},

     
···

       327
        
       

     

       328
        
       	select {

     

       329
        
       	case <-tailDone:

     

       330
       -
       

     

       331
        
       	case <-ctx.Done():

     

       332
        
       		// cleanup will be handled by DestroyWorkflow, since

     

       333
        
       		// Docker doesn't provide an API to kill an exec run

     

       334
        
       		// (sure, we could grab the PID and kill it ourselves,

     

       335
        
       		// but that's wasted effort)

     

       336
        
       		e.l.Warn("step timed out", "step", step.Name)

     

       337
       -
       

     

       338
        
       		<-tailDone

     

       339
       -
       

     

       340
        
       		return engine.ErrTimedOut

     

       341
        
       	}

     

       342
        
       

     
···

       346
        
       	default:

     

       347
        
       	}

     

       348
        
       

     

       349
       -
       	execInspectResp, err := e.docker.ContainerExecInspect(ctx, mkExecResp.ID)

     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       350
        
       	if err != nil {

     

       351
        
       		return err

     

       352
        
       	}

     

       353
        
       

     

       354
       -
       	if execInspectResp.ExitCode != 0 {

     

       355
       -
       		inspectResp, err := e.docker.ContainerInspect(ctx, addl.container)

     

       356
       -
       		if err != nil {

     

       357
       -
       			return err

     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       358
        
       		}

     

       359
        
       

     

       360
       -
       		e.l.Error("workflow failed!", "workflow_id", wid.String(), "exit_code", execInspectResp.ExitCode, "oom_killed", inspectResp.State.OOMKilled)

     

       0
        
       
     

       0
        
       
     

       361
        
       

     

       362
       -
       		if inspectResp.State.OOMKilled {

     

       363
       -
       			return ErrOOMKilled

     

       364
        
       		}

     

       365
       -
       		return engine.ErrWorkflowFailed

     

       366
        
       	}

     

       367
        
       

     

       368
       -
       	return nil

     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       369
        
       }

     

       370
        
       

     

       371
        
       func (e *Engine) tailStep(ctx context.Context, wfLogger *models.WorkflowLogger, execID string, wid models.WorkflowId, stepIdx int, step models.Step) error {

···

       173
        
       func (e *Engine) SetupWorkflow(ctx context.Context, wid models.WorkflowId, wf *models.Workflow) error {

     

       174
        
       	e.l.Info("setting up workflow", "workflow", wid)

     

       175
        
       

     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       176
        
       	addl := wf.Data.(addlFields)

     

       177
        
       

     

       178
        
       	reader, err := e.docker.ImagePull(ctx, addl.image, image.PullOptions{})

     
···

       183
        
       	}

     

       184
        
       	defer reader.Close()

     

       185
        
       	io.Copy(os.Stdout, reader)

     

       186
       +
       

     

       187
       +
       	_, err = e.docker.NetworkCreate(ctx, networkName(wid), network.CreateOptions{

     

       188
       +
       		Driver: "bridge",

     

       189
       +
       	})

     

       190
       +
       	if err != nil {

     

       191
       +
       		return err

     

       192
       +
       	}

     

       193
       +
       	e.registerCleanup(wid, func(ctx context.Context) error {

     

       194
       +
       		return e.docker.NetworkRemove(ctx, networkName(wid))

     

       195
       +
       	})

     

       196
        
       

     

       197
        
       	resp, err := e.docker.ContainerCreate(ctx, &container.Config{

     

       198
        
       		Image:      addl.image,

     
···

       294
        
       	for _, s := range secrets {

     

       295
        
       		workflowEnvs.AddEnv(s.Key, s.Value)

     

       296
        
       	}

     

       0
        
       
     

       297
        
       	step := w.Steps[idx].(Step)

     

       0
        
       
     

       298
        
       	select {

     

       299
        
       	case <-ctx.Done():

     

       300
        
       		return ctx.Err()

     

       301
        
       	default:

     

       302
        
       	}

     

       0
        
       
     

       303
        
       	envs := append(EnvVars(nil), workflowEnvs...)

     

       304
        
       	for k, v := range step.environment {

     

       305
        
       		envs.AddEnv(k, v)

     

       306
        
       	}

     

       307
        
       	envs.AddEnv("HOME", homeDir)

     

       308
       +
       

     

       309
       +
       	e.l.Info("executing step",

     

       310
       +
       		"workflow_id", wid.String(),

     

       311
       +
       		"step_index", idx,

     

       312
       +
       		"step_name", step.Name,

     

       313
       +
       		"command", step.command,

     

       314
       +
       	)

     

       315
        
       

     

       316
        
       	mkExecResp, err := e.docker.ContainerExecCreate(ctx, addl.container, container.ExecOptions{

     

       317
        
       		Cmd:          []string{"bash", "-c", step.command},

     
···

       331
        
       

     

       332
        
       	select {

     

       333
        
       	case <-tailDone:

     

       0
        
       
     

       334
        
       	case <-ctx.Done():

     

       335
        
       		// cleanup will be handled by DestroyWorkflow, since

     

       336
        
       		// Docker doesn't provide an API to kill an exec run

     

       337
        
       		// (sure, we could grab the PID and kill it ourselves,

     

       338
        
       		// but that's wasted effort)

     

       339
        
       		e.l.Warn("step timed out", "step", step.Name)

     

       0
        
       
     

       340
        
       		<-tailDone

     

       0
        
       
     

       341
        
       		return engine.ErrTimedOut

     

       342
        
       	}

     

       343
        
       

     
···

       347
        
       	default:

     

       348
        
       	}

     

       349
        
       

     

       350
       +
       	if err = e.handleStepFailure(ctx, wid, w, idx, mkExecResp.ID); err != nil {

     

       351
       +
       		return err

     

       352
       +
       	}

     

       353
       +
       

     

       354
       +
       	e.l.Info("step completed successfully",

     

       355
       +
       		"workflow_id", wid.String(),

     

       356
       +
       		"step_index", idx,

     

       357
       +
       		"step_name", step.Name,

     

       358
       +
       	)

     

       359
       +
       

     

       360
       +
       	return nil

     

       361
       +
       }

     

       362
       +
       

     

       363
       +
       // logStepFailure logs detailed information about a failed workflow step

     

       364
       +
       func (e *Engine) handleStepFailure(

     

       365
       +
       	ctx context.Context,

     

       366
       +
       	wid models.WorkflowId,

     

       367
       +
       	w *models.Workflow,

     

       368
       +
       	idx int,

     

       369
       +
       	execID string,

     

       370
       +
       ) error {

     

       371
       +
       	addl := w.Data.(addlFields)

     

       372
       +
       	step := w.Steps[idx].(Step)

     

       373
       +
       

     

       374
       +
       	inspectResp, err := e.docker.ContainerInspect(ctx, addl.container)

     

       375
        
       	if err != nil {

     

       376
        
       		return err

     

       377
        
       	}

     

       378
        
       

     

       379
       +
       	execInspectResp, err := e.docker.ContainerExecInspect(ctx, execID)

     

       380
       +
       	if err != nil {

     

       381
       +
       		return err

     

       382
       +
       	}

     

       383
       +
       

     

       384
       +
       	// no error

     

       385
       +
       	if execInspectResp.ExitCode == 0 {

     

       386
       +
       		return nil

     

       387
       +
       	}

     

       388
       +
       

     

       389
       +
       	logFields := []any{

     

       390
       +
       		"workflow_id", wid.String(),

     

       391
       +
       		"step_index", idx,

     

       392
       +
       		"step_name", step.Name,

     

       393
       +
       		"command", step.command,

     

       394
       +
       		"container_exit_code", inspectResp.State.ExitCode,

     

       395
       +
       		"container_oom_killed", inspectResp.State.OOMKilled,

     

       396
       +
       		"exec_exit_code", execInspectResp.ExitCode,

     

       397
       +
       	}

     

       398
       +
       

     

       399
       +
       	// Add container state information

     

       400
       +
       	if inspectResp.State != nil {

     

       401
       +
       		logFields = append(logFields,

     

       402
       +
       			"container_status", inspectResp.State.Status,

     

       403
       +
       			"container_running", inspectResp.State.Running,

     

       404
       +
       			"container_paused", inspectResp.State.Paused,

     

       405
       +
       			"container_restarting", inspectResp.State.Restarting,

     

       406
       +
       			"container_dead", inspectResp.State.Dead,

     

       407
       +
       		)

     

       408
       +
       

     

       409
       +
       		if inspectResp.State.Error != "" {

     

       410
       +
       			logFields = append(logFields, "container_error", inspectResp.State.Error)

     

       411
        
       		}

     

       412
        
       

     

       413
       +
       		if inspectResp.State.StartedAt != "" {

     

       414
       +
       			logFields = append(logFields, "container_started_at", inspectResp.State.StartedAt)

     

       415
       +
       		}

     

       416
        
       

     

       417
       +
       		if inspectResp.State.FinishedAt != "" {

     

       418
       +
       			logFields = append(logFields, "container_finished_at", inspectResp.State.FinishedAt)

     

       419
        
       		}

     

       0
        
       
     

       420
        
       	}

     

       421
        
       

     

       422
       +
       	// Add resource usage if available

     

       423
       +
       	if inspectResp.HostConfig != nil && inspectResp.HostConfig.Memory > 0 {

     

       424
       +
       		logFields = append(logFields, "memory_limit", inspectResp.HostConfig.Memory)

     

       425
       +
       	}

     

       426
       +
       

     

       427
       +
       	e.l.Error("workflow step failed!", logFields...)

     

       428
       +
       

     

       429
       +
       	if inspectResp.State.OOMKilled {

     

       430
       +
       		return ErrOOMKilled

     

       431
       +
       	}

     

       432
       +
       	return engine.ErrWorkflowFailed

     

       433
        
       }

     

       434
        
       

     

       435
        
       func (e *Engine) tailStep(ctx context.Context, wfLogger *models.WorkflowLogger, execID string, wid models.WorkflowId, stepIdx int, step models.Step) error {