-
This commit is contained in:
@ -7,6 +7,7 @@ import (
|
||||
"io"
|
||||
"regexp"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/docker/docker/api/types/container"
|
||||
@ -27,19 +28,19 @@ type SimpleDockerRuntime struct {
|
||||
func NewSimpleDockerRuntime(logger *zap.Logger) (*SimpleDockerRuntime, error) {
|
||||
var cli *client.Client
|
||||
var err error
|
||||
|
||||
|
||||
// Try different socket paths with ping test
|
||||
socketPaths := []string{
|
||||
"unix:///run/user/1000/podman/podman.sock", // Podman socket (mounted from host)
|
||||
"unix:///var/run/docker.sock", // Standard Docker socket
|
||||
}
|
||||
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
defer cancel()
|
||||
|
||||
|
||||
for _, socketPath := range socketPaths {
|
||||
logger.Info("Attempting to connect to socket", zap.String("path", socketPath))
|
||||
|
||||
|
||||
cli, err = client.NewClientWithOpts(
|
||||
client.WithHost(socketPath),
|
||||
client.WithAPIVersionNegotiation(),
|
||||
@ -54,11 +55,11 @@ func NewSimpleDockerRuntime(logger *zap.Logger) (*SimpleDockerRuntime, error) {
|
||||
logger.Warn("Failed to ping daemon", zap.String("path", socketPath), zap.Error(err))
|
||||
continue
|
||||
}
|
||||
|
||||
|
||||
logger.Info("Successfully connected to Docker/Podman", zap.String("path", socketPath))
|
||||
break
|
||||
}
|
||||
|
||||
|
||||
// Final fallback to environment
|
||||
if cli == nil {
|
||||
logger.Info("Trying default Docker environment")
|
||||
@ -66,12 +67,12 @@ func NewSimpleDockerRuntime(logger *zap.Logger) (*SimpleDockerRuntime, error) {
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create Docker client: %w", err)
|
||||
}
|
||||
|
||||
|
||||
if _, err := cli.Ping(ctx); err != nil {
|
||||
return nil, fmt.Errorf("failed to ping Docker/Podman daemon: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if cli == nil {
|
||||
return nil, fmt.Errorf("no working Docker/Podman socket found")
|
||||
}
|
||||
@ -83,13 +84,26 @@ func NewSimpleDockerRuntime(logger *zap.Logger) (*SimpleDockerRuntime, error) {
|
||||
}
|
||||
|
||||
func (s *SimpleDockerRuntime) Execute(ctx context.Context, function *domain.FunctionDefinition, input json.RawMessage) (*domain.ExecutionResult, error) {
|
||||
return s.ExecuteWithLogStreaming(ctx, function, input, nil)
|
||||
}
|
||||
|
||||
func (s *SimpleDockerRuntime) ExecuteWithLogStreaming(ctx context.Context, function *domain.FunctionDefinition, input json.RawMessage, logCallback runtime.LogStreamCallback) (*domain.ExecutionResult, error) {
|
||||
startTime := time.Now()
|
||||
|
||||
s.logger.Info("Starting ExecuteWithLogStreaming",
|
||||
zap.String("function_id", function.ID.String()),
|
||||
zap.String("function_name", function.Name),
|
||||
zap.Bool("has_log_callback", logCallback != nil))
|
||||
|
||||
// Create container
|
||||
containerID, err := s.createContainer(ctx, function, input)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create container: %w", err)
|
||||
}
|
||||
|
||||
s.logger.Debug("Container created successfully",
|
||||
zap.String("container_id", containerID),
|
||||
zap.String("function_id", function.ID.String()))
|
||||
|
||||
// Start container
|
||||
if err := s.client.ContainerStart(ctx, containerID, container.StartOptions{}); err != nil {
|
||||
@ -97,23 +111,129 @@ func (s *SimpleDockerRuntime) Execute(ctx context.Context, function *domain.Func
|
||||
return nil, fmt.Errorf("failed to start container: %w", err)
|
||||
}
|
||||
|
||||
// Create channels for log streaming
|
||||
logChan := make(chan string, 1000) // Buffer for logs
|
||||
doneChan := make(chan struct{}) // Signal to stop streaming
|
||||
|
||||
// Start log streaming in a goroutine
|
||||
s.logger.Debug("Starting log streaming goroutine",
|
||||
zap.String("container_id", containerID),
|
||||
zap.String("function_id", function.ID.String()))
|
||||
go s.streamContainerLogs(context.Background(), containerID, logChan, doneChan)
|
||||
|
||||
// Create timeout context based on function timeout
|
||||
var timeoutCtx context.Context
|
||||
var cancel context.CancelFunc
|
||||
if function.Timeout.Duration > 0 {
|
||||
timeoutCtx, cancel = context.WithTimeout(ctx, function.Timeout.Duration)
|
||||
defer cancel()
|
||||
s.logger.Debug("Set execution timeout",
|
||||
zap.Duration("timeout", function.Timeout.Duration),
|
||||
zap.String("container_id", containerID))
|
||||
} else {
|
||||
timeoutCtx = ctx
|
||||
s.logger.Debug("No execution timeout set",
|
||||
zap.String("container_id", containerID))
|
||||
}
|
||||
|
||||
// For streaming logs, collect logs in a separate goroutine and call the callback
|
||||
var streamedLogs []string
|
||||
logsMutex := &sync.Mutex{}
|
||||
|
||||
if logCallback != nil {
|
||||
s.logger.Info("Starting log callback goroutine",
|
||||
zap.String("container_id", containerID))
|
||||
go func() {
|
||||
// Keep track of the last time we called the callback to avoid too frequent updates
|
||||
lastUpdate := time.Now()
|
||||
ticker := time.NewTicker(1 * time.Second) // Update at most once per second
|
||||
defer ticker.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
case log, ok := <-logChan:
|
||||
if !ok {
|
||||
// Channel closed, exit the goroutine
|
||||
s.logger.Debug("Log channel closed, exiting callback goroutine",
|
||||
zap.String("container_id", containerID))
|
||||
return
|
||||
}
|
||||
|
||||
s.logger.Debug("Received log line from channel",
|
||||
zap.String("container_id", containerID),
|
||||
zap.String("log_line", log))
|
||||
|
||||
logsMutex.Lock()
|
||||
streamedLogs = append(streamedLogs, log)
|
||||
shouldUpdate := time.Since(lastUpdate) >= 1*time.Second
|
||||
currentLogCount := len(streamedLogs)
|
||||
logsMutex.Unlock()
|
||||
|
||||
// Call the callback if it's been at least 1 second since last update
|
||||
if shouldUpdate {
|
||||
logsMutex.Lock()
|
||||
logsCopy := make([]string, len(streamedLogs))
|
||||
copy(logsCopy, streamedLogs)
|
||||
logsMutex.Unlock()
|
||||
|
||||
s.logger.Info("Calling log callback with accumulated logs",
|
||||
zap.String("container_id", containerID),
|
||||
zap.Int("log_count", len(logsCopy)))
|
||||
|
||||
// Call the callback with the current logs
|
||||
if err := logCallback(logsCopy); err != nil {
|
||||
s.logger.Error("Failed to stream logs to callback",
|
||||
zap.String("container_id", containerID),
|
||||
zap.Error(err))
|
||||
}
|
||||
lastUpdate = time.Now()
|
||||
} else {
|
||||
s.logger.Debug("Skipping callback update (too frequent)",
|
||||
zap.String("container_id", containerID),
|
||||
zap.Int("current_log_count", currentLogCount),
|
||||
zap.Duration("time_since_last_update", time.Since(lastUpdate)))
|
||||
}
|
||||
case <-ticker.C:
|
||||
// Periodic update to ensure logs are streamed even if no new logs arrive
|
||||
logsMutex.Lock()
|
||||
if len(streamedLogs) > 0 && time.Since(lastUpdate) >= 1*time.Second {
|
||||
logsCopy := make([]string, len(streamedLogs))
|
||||
copy(logsCopy, streamedLogs)
|
||||
logCount := len(logsCopy)
|
||||
logsMutex.Unlock()
|
||||
|
||||
s.logger.Debug("Periodic callback update triggered",
|
||||
zap.String("container_id", containerID),
|
||||
zap.Int("log_count", logCount))
|
||||
|
||||
// Call the callback with the current logs
|
||||
if err := logCallback(logsCopy); err != nil {
|
||||
s.logger.Error("Failed to stream logs to callback (periodic)",
|
||||
zap.String("container_id", containerID),
|
||||
zap.Error(err))
|
||||
}
|
||||
lastUpdate = time.Now()
|
||||
} else {
|
||||
logsMutex.Unlock()
|
||||
s.logger.Debug("Skipping periodic callback (no logs or too frequent)",
|
||||
zap.String("container_id", containerID),
|
||||
zap.Duration("time_since_last_update", time.Since(lastUpdate)))
|
||||
}
|
||||
}
|
||||
}
|
||||
}()
|
||||
} else {
|
||||
s.logger.Debug("No log callback provided, logs will be collected at the end",
|
||||
zap.String("container_id", containerID))
|
||||
}
|
||||
|
||||
// Wait for container to finish with timeout
|
||||
statusCh, errCh := s.client.ContainerWait(timeoutCtx, containerID, container.WaitConditionNotRunning)
|
||||
|
||||
|
||||
var timedOut bool
|
||||
select {
|
||||
case err := <-errCh:
|
||||
close(doneChan) // Stop log streaming
|
||||
s.cleanupContainer(ctx, containerID)
|
||||
return nil, fmt.Errorf("error waiting for container: %w", err)
|
||||
case <-statusCh:
|
||||
@ -121,19 +241,20 @@ func (s *SimpleDockerRuntime) Execute(ctx context.Context, function *domain.Func
|
||||
case <-timeoutCtx.Done():
|
||||
// Timeout occurred
|
||||
timedOut = true
|
||||
|
||||
// doneChan will be closed below in the common cleanup
|
||||
|
||||
// Stop the container in the background - don't wait for it to complete
|
||||
go func() {
|
||||
// Use a very short timeout for stopping, then kill if needed
|
||||
if err := s.client.ContainerStop(context.Background(), containerID, container.StopOptions{
|
||||
Timeout: &[]int{1}[0], // Only 1 second grace period for stop
|
||||
}); err != nil {
|
||||
s.logger.Warn("Failed to stop timed out container gracefully, attempting to kill",
|
||||
s.logger.Warn("Failed to stop timed out container gracefully, attempting to kill",
|
||||
zap.String("container_id", containerID),
|
||||
zap.Error(err))
|
||||
// If stop fails, try to kill it immediately
|
||||
if killErr := s.client.ContainerKill(context.Background(), containerID, "SIGKILL"); killErr != nil {
|
||||
s.logger.Error("Failed to kill timed out container",
|
||||
s.logger.Error("Failed to kill timed out container",
|
||||
zap.String("container_id", containerID),
|
||||
zap.Error(killErr))
|
||||
}
|
||||
@ -141,21 +262,67 @@ func (s *SimpleDockerRuntime) Execute(ctx context.Context, function *domain.Func
|
||||
}()
|
||||
}
|
||||
|
||||
// Collect all streamed logs
|
||||
var logs []string
|
||||
var stats *container.InspectResponse
|
||||
|
||||
// For timed-out containers, skip log retrieval and inspection to return quickly
|
||||
if timedOut {
|
||||
logs = []string{"Container execution timed out"}
|
||||
} else {
|
||||
// Get container logs
|
||||
var err error
|
||||
logs, err = s.getContainerLogs(ctx, containerID)
|
||||
if err != nil {
|
||||
s.logger.Warn("Failed to get container logs", zap.Error(err))
|
||||
logs = []string{"Failed to retrieve logs"}
|
||||
if !timedOut {
|
||||
// Collect any remaining logs from the channel
|
||||
close(doneChan) // Stop log streaming
|
||||
|
||||
// Give a moment for final logs to be processed
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
|
||||
if logCallback == nil {
|
||||
// If no callback, collect all logs at the end
|
||||
for log := range logChan {
|
||||
logs = append(logs, log)
|
||||
}
|
||||
} else {
|
||||
// If we have a callback, use the streamed logs plus any remaining in channel
|
||||
logsMutex.Lock()
|
||||
logs = make([]string, len(streamedLogs))
|
||||
copy(logs, streamedLogs)
|
||||
logsMutex.Unlock()
|
||||
|
||||
// Collect any remaining logs in the channel
|
||||
remainingLogs := make([]string, 0)
|
||||
for {
|
||||
select {
|
||||
case log := <-logChan:
|
||||
remainingLogs = append(remainingLogs, log)
|
||||
default:
|
||||
goto done
|
||||
}
|
||||
}
|
||||
done:
|
||||
logs = append(logs, remainingLogs...)
|
||||
}
|
||||
} else {
|
||||
logs = []string{"Container execution timed out"}
|
||||
}
|
||||
|
||||
var stats *container.InspectResponse
|
||||
|
||||
// For timed-out containers, still try to collect logs but with a short timeout
|
||||
if timedOut {
|
||||
// Collect any remaining logs from the channel before adding timeout message
|
||||
// doneChan was already closed above
|
||||
if logCallback == nil {
|
||||
// If no callback was used, try to collect logs directly but with short timeout
|
||||
logCtx, logCancel := context.WithTimeout(context.Background(), 2*time.Second)
|
||||
finalLogs, err := s.getContainerLogs(logCtx, containerID)
|
||||
logCancel()
|
||||
if err == nil {
|
||||
logs = finalLogs
|
||||
}
|
||||
} else {
|
||||
// If callback was used, use the streamed logs
|
||||
logsMutex.Lock()
|
||||
logs = make([]string, len(streamedLogs))
|
||||
copy(logs, streamedLogs)
|
||||
logsMutex.Unlock()
|
||||
}
|
||||
logs = append(logs, "Container execution timed out")
|
||||
} else {
|
||||
// Get container stats
|
||||
statsResponse, err := s.client.ContainerInspect(ctx, containerID)
|
||||
if err != nil {
|
||||
@ -418,7 +585,7 @@ func (s *SimpleDockerRuntime) getContainerLogs(ctx context.Context, containerID
|
||||
|
||||
// Parse Docker logs to remove binary headers
|
||||
rawOutput := parseDockerLogs(logData)
|
||||
|
||||
|
||||
// Parse the XML-tagged output to extract logs
|
||||
parsedLogs, _, err := s.parseContainerOutput(rawOutput)
|
||||
if err != nil {
|
||||
@ -467,7 +634,7 @@ func (s *SimpleDockerRuntime) getContainerOutput(ctx context.Context, containerI
|
||||
|
||||
// Parse Docker logs to remove binary headers
|
||||
rawOutput := parseDockerLogs(logData)
|
||||
|
||||
|
||||
// Parse the XML-tagged output to extract the result
|
||||
_, result, err := s.parseContainerOutput(rawOutput)
|
||||
if err != nil {
|
||||
@ -479,8 +646,8 @@ func (s *SimpleDockerRuntime) getContainerOutput(ctx context.Context, containerI
|
||||
} else {
|
||||
// Return the output wrapped in a JSON object
|
||||
fallbackResult := map[string]interface{}{
|
||||
"result": "Function executed successfully",
|
||||
"output": logContent,
|
||||
"result": "Function executed successfully",
|
||||
"output": logContent,
|
||||
"timestamp": time.Now().UTC(),
|
||||
}
|
||||
resultJSON, _ := json.Marshal(fallbackResult)
|
||||
@ -491,8 +658,8 @@ func (s *SimpleDockerRuntime) getContainerOutput(ctx context.Context, containerI
|
||||
// If no result was found in XML tags, provide a default success result
|
||||
if result == nil {
|
||||
defaultResult := map[string]interface{}{
|
||||
"result": "Function executed successfully",
|
||||
"message": "No result output found",
|
||||
"result": "Function executed successfully",
|
||||
"message": "No result output found",
|
||||
"timestamp": time.Now().UTC(),
|
||||
}
|
||||
resultJSON, _ := json.Marshal(defaultResult)
|
||||
@ -505,7 +672,7 @@ func (s *SimpleDockerRuntime) getContainerOutput(ctx context.Context, containerI
|
||||
// parseDockerLogs parses Docker log output which includes 8-byte headers
|
||||
func parseDockerLogs(logData []byte) string {
|
||||
var cleanOutput strings.Builder
|
||||
|
||||
|
||||
for len(logData) > 8 {
|
||||
// Docker log header: [STREAM_TYPE, 0, 0, 0, SIZE1, SIZE2, SIZE3, SIZE4]
|
||||
// Skip the first 8 bytes (header)
|
||||
@ -513,25 +680,25 @@ func parseDockerLogs(logData []byte) string {
|
||||
if len(logData) < headerSize {
|
||||
break
|
||||
}
|
||||
|
||||
|
||||
// Extract size from bytes 4-7 (big endian)
|
||||
size := int(logData[4])<<24 + int(logData[5])<<16 + int(logData[6])<<8 + int(logData[7])
|
||||
|
||||
|
||||
if len(logData) < headerSize+size {
|
||||
// If the remaining data is less than expected size, take what we have
|
||||
size = len(logData) - headerSize
|
||||
}
|
||||
|
||||
|
||||
if size > 0 {
|
||||
// Extract the actual log content
|
||||
content := string(logData[headerSize : headerSize+size])
|
||||
cleanOutput.WriteString(content)
|
||||
}
|
||||
|
||||
|
||||
// Move to next log entry
|
||||
logData = logData[headerSize+size:]
|
||||
}
|
||||
|
||||
|
||||
return cleanOutput.String()
|
||||
}
|
||||
|
||||
@ -581,7 +748,7 @@ func (s *SimpleDockerRuntime) parseContainerOutput(rawOutput string) (logs []str
|
||||
// Remove any XML tags from the output for fallback
|
||||
cleanOutput := regexp.MustCompile(`(?s)<[^>]*>`).ReplaceAllString(rawOutput, "")
|
||||
cleanOutput = strings.TrimSpace(cleanOutput)
|
||||
|
||||
|
||||
if cleanOutput != "" {
|
||||
if json.Valid([]byte(cleanOutput)) {
|
||||
result = json.RawMessage(cleanOutput)
|
||||
@ -599,6 +766,130 @@ func (s *SimpleDockerRuntime) parseContainerOutput(rawOutput string) (logs []str
|
||||
return logs, result, nil
|
||||
}
|
||||
|
||||
// streamContainerLogs streams logs from a running container and sends them to a channel
|
||||
func (s *SimpleDockerRuntime) streamContainerLogs(ctx context.Context, containerID string, logChan chan<- string, doneChan <-chan struct{}) {
|
||||
defer close(logChan)
|
||||
|
||||
s.logger.Info("Starting container log streaming",
|
||||
zap.String("container_id", containerID))
|
||||
|
||||
// Get container logs with follow option
|
||||
logs, err := s.client.ContainerLogs(ctx, containerID, container.LogsOptions{
|
||||
ShowStdout: true,
|
||||
ShowStderr: true,
|
||||
Follow: true,
|
||||
Timestamps: false,
|
||||
})
|
||||
if err != nil {
|
||||
s.logger.Error("Failed to get container logs for streaming",
|
||||
zap.String("container_id", containerID),
|
||||
zap.Error(err))
|
||||
return
|
||||
}
|
||||
defer logs.Close()
|
||||
|
||||
s.logger.Debug("Successfully got container logs stream",
|
||||
zap.String("container_id", containerID))
|
||||
|
||||
// Create a context that cancels when doneChan receives a signal
|
||||
streamCtx, cancel := context.WithCancel(ctx)
|
||||
defer cancel()
|
||||
|
||||
// Goroutine to listen for done signal
|
||||
go func() {
|
||||
select {
|
||||
case <-doneChan:
|
||||
cancel()
|
||||
case <-streamCtx.Done():
|
||||
}
|
||||
}()
|
||||
|
||||
// Buffer for reading log data
|
||||
buf := make([]byte, 4096)
|
||||
|
||||
// Continue reading until context is cancelled or EOF
|
||||
totalLogLines := 0
|
||||
for {
|
||||
select {
|
||||
case <-streamCtx.Done():
|
||||
s.logger.Debug("Stream context cancelled, stopping log streaming",
|
||||
zap.String("container_id", containerID),
|
||||
zap.Int("total_lines_streamed", totalLogLines))
|
||||
return
|
||||
default:
|
||||
n, err := logs.Read(buf)
|
||||
if n > 0 {
|
||||
s.logger.Debug("Read log data from container",
|
||||
zap.String("container_id", containerID),
|
||||
zap.Int("bytes_read", n))
|
||||
|
||||
// Parse Docker logs to remove binary headers
|
||||
logData := buf[:n]
|
||||
rawOutput := parseDockerLogs(logData)
|
||||
|
||||
// Send each line to the log channel, filtering out XML tags
|
||||
lines := strings.Split(rawOutput, "\n")
|
||||
for _, line := range lines {
|
||||
trimmedLine := strings.TrimSpace(line)
|
||||
// Skip empty lines and XML tags
|
||||
if trimmedLine != "" &&
|
||||
!strings.HasPrefix(trimmedLine, "<stdout>") &&
|
||||
!strings.HasPrefix(trimmedLine, "</stdout>") &&
|
||||
!strings.HasPrefix(trimmedLine, "<result>") &&
|
||||
!strings.HasPrefix(trimmedLine, "</result>") &&
|
||||
trimmedLine != "<stdout>" &&
|
||||
trimmedLine != "</stdout>" &&
|
||||
trimmedLine != "<result>" &&
|
||||
trimmedLine != "</result>" {
|
||||
|
||||
totalLogLines++
|
||||
s.logger.Debug("Sending filtered log line to channel",
|
||||
zap.String("container_id", containerID),
|
||||
zap.String("log_line", trimmedLine),
|
||||
zap.Int("total_lines", totalLogLines))
|
||||
|
||||
select {
|
||||
case logChan <- trimmedLine:
|
||||
s.logger.Debug("Successfully sent filtered log line to channel",
|
||||
zap.String("container_id", containerID))
|
||||
case <-streamCtx.Done():
|
||||
s.logger.Debug("Stream context cancelled while sending log line",
|
||||
zap.String("container_id", containerID))
|
||||
return
|
||||
default:
|
||||
// Log buffer is full, warn but continue reading to avoid blocking
|
||||
s.logger.Warn("Log buffer full, dropping log line",
|
||||
zap.String("container_id", containerID),
|
||||
zap.String("dropped_line", trimmedLine))
|
||||
}
|
||||
} else if trimmedLine != "" {
|
||||
s.logger.Debug("Filtered out XML tag",
|
||||
zap.String("container_id", containerID),
|
||||
zap.String("filtered_line", trimmedLine))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
if err == io.EOF {
|
||||
s.logger.Debug("Got EOF from container logs, container might still be running",
|
||||
zap.String("container_id", containerID),
|
||||
zap.Int("total_lines_streamed", totalLogLines))
|
||||
// Container might still be running, continue reading
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
continue
|
||||
} else {
|
||||
s.logger.Error("Error reading container logs",
|
||||
zap.String("container_id", containerID),
|
||||
zap.Error(err),
|
||||
zap.Int("total_lines_streamed", totalLogLines))
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (s *SimpleDockerRuntime) cleanupContainer(ctx context.Context, containerID string) {
|
||||
// Remove container
|
||||
if err := s.client.ContainerRemove(ctx, containerID, container.RemoveOptions{
|
||||
|
||||
Reference in New Issue
Block a user