skybridge/faas/internal/runtime/docker/simple.go

package docker

import (
	"context"
	"encoding/json"
	"fmt"
	"strings"
	"time"

	"github.com/docker/docker/api/types/container"
	"github.com/docker/docker/api/types/image"
	"github.com/docker/docker/client"
	"github.com/google/uuid"
	"go.uber.org/zap"

	"github.com/RyanCopley/skybridge/faas/internal/domain"
	"github.com/RyanCopley/skybridge/faas/internal/runtime"
)

type SimpleDockerRuntime struct {
	logger *zap.Logger
	client *client.Client
}

func NewSimpleDockerRuntime(logger *zap.Logger) (*SimpleDockerRuntime, error) {
	var cli *client.Client
	var err error

	// Try different socket paths with ping test
	socketPaths := []string{
		"unix:///run/user/1000/podman/podman.sock", // Podman socket (mounted from host)
		"unix:///var/run/docker.sock",              // Standard Docker socket
	}

	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
	defer cancel()

	for _, socketPath := range socketPaths {
		logger.Info("Attempting to connect to socket", zap.String("path", socketPath))

		cli, err = client.NewClientWithOpts(
			client.WithHost(socketPath),
			client.WithAPIVersionNegotiation(),
		)
		if err != nil {
			logger.Warn("Failed to create client", zap.String("path", socketPath), zap.Error(err))
			continue
		}

		// Test connection
		if _, err := cli.Ping(ctx); err != nil {
			logger.Warn("Failed to ping daemon", zap.String("path", socketPath), zap.Error(err))
			continue
		}

		logger.Info("Successfully connected to Docker/Podman", zap.String("path", socketPath))
		break
	}

	// Final fallback to environment
	if cli == nil {
		logger.Info("Trying default Docker environment")
		cli, err = client.NewClientWithOpts(client.FromEnv, client.WithAPIVersionNegotiation())
		if err != nil {
			return nil, fmt.Errorf("failed to create Docker client: %w", err)
		}

		if _, err := cli.Ping(ctx); err != nil {
			return nil, fmt.Errorf("failed to ping Docker/Podman daemon: %w", err)
		}
	}

	if cli == nil {
		return nil, fmt.Errorf("no working Docker/Podman socket found")
	}

	return &SimpleDockerRuntime{
		logger: logger,
		client: cli,
	}, nil
}

func (s *SimpleDockerRuntime) Execute(ctx context.Context, function *domain.FunctionDefinition, input json.RawMessage) (*domain.ExecutionResult, error) {
	startTime := time.Now()

	s.logger.Info("Executing function in Docker container",
		zap.String("function_id", function.ID.String()),
		zap.String("name", function.Name),
		zap.String("image", function.Image))

	// Create container
	containerID, err := s.createContainer(ctx, function, input)
	if err != nil {
		return nil, fmt.Errorf("failed to create container: %w", err)
	}

	// Start container
	if err := s.client.ContainerStart(ctx, containerID, container.StartOptions{}); err != nil {
		s.cleanupContainer(ctx, containerID)
		return nil, fmt.Errorf("failed to start container: %w", err)
	}

	// Wait for container to finish
	statusCh, errCh := s.client.ContainerWait(ctx, containerID, container.WaitConditionNotRunning)
	select {
	case err := <-errCh:
		s.cleanupContainer(ctx, containerID)
		return nil, fmt.Errorf("error waiting for container: %w", err)
	case <-statusCh:
		// Container finished
	}

	// Get container logs
	logs, err := s.getContainerLogs(ctx, containerID)
	if err != nil {
		s.logger.Warn("Failed to get container logs", zap.Error(err))
		logs = []string{"Failed to retrieve logs"}
	}

	// Get container stats
	stats, err := s.client.ContainerInspect(ctx, containerID)
	if err != nil {
		s.logger.Warn("Failed to inspect container", zap.Error(err))
	}

	// Get execution result
	result := &domain.ExecutionResult{
		Logs: logs,
	}

	// Try to get output from container
	if stats.State != nil {
		result.Duration = time.Since(startTime).Truncate(time.Millisecond)
		if stats.State.ExitCode == 0 {
			// Try to get output from container
			output, err := s.getContainerOutput(ctx, containerID)
			if err != nil {
				s.logger.Warn("Failed to get container output", zap.Error(err))
				result.Output = json.RawMessage(`{"error": "Failed to retrieve output"}`)
			} else {
				result.Output = output
			}
		} else {
			result.Error = fmt.Sprintf("Container exited with code %d", stats.State.ExitCode)
		}
	}

	// Cleanup container
	s.cleanupContainer(ctx, containerID)

	return result, nil
}

func (s *SimpleDockerRuntime) Deploy(ctx context.Context, function *domain.FunctionDefinition) error {
	s.logger.Info("Deploying function image",
		zap.String("function_id", function.ID.String()),
		zap.String("image", function.Image))

	// Pull the image if it doesn't exist
	_, _, err := s.client.ImageInspectWithRaw(ctx, function.Image)
	if err != nil {
		// Image doesn't exist, try to pull it
		s.logger.Info("Pulling image", zap.String("image", function.Image))
		reader, err := s.client.ImagePull(ctx, function.Image, image.PullOptions{})
		if err != nil {
			return fmt.Errorf("failed to pull image %s: %w", function.Image, err)
		}
		defer reader.Close()

		// Wait for pull to complete (we could parse the output but for now we'll just wait)
		buf := make([]byte, 1024)
		for {
			_, err := reader.Read(buf)
			if err != nil {
				break
			}
		}
	}

	return nil
}

func (s *SimpleDockerRuntime) Remove(ctx context.Context, functionID uuid.UUID) error {
	s.logger.Info("Removing function resources", zap.String("function_id", functionID.String()))
	// In a real implementation, we would remove any function-specific resources
	// For now, we don't need to do anything as containers are cleaned up after execution
	return nil
}

func (s *SimpleDockerRuntime) GetLogs(ctx context.Context, executionID uuid.UUID) ([]string, error) {
	// In a real implementation, we would need to store container IDs associated with execution IDs
	// For now, we'll return a placeholder
	return []string{
		"Function execution logs would appear here",
		"In a full implementation, these would be retrieved from the Docker container",
	}, nil
}

func (s *SimpleDockerRuntime) HealthCheck(ctx context.Context) error {
	_, err := s.client.Ping(ctx)
	return err
}

func (s *SimpleDockerRuntime) GetInfo(ctx context.Context) (*runtime.RuntimeInfo, error) {
	info, err := s.client.Info(ctx)
	if err != nil {
		return nil, fmt.Errorf("failed to get Docker info: %w", err)
	}

	return &runtime.RuntimeInfo{
		Type:      "docker",
		Version:   info.ServerVersion,
		Available: true,
		Endpoint:  s.client.DaemonHost(),
		Metadata: map[string]string{
			"containers":      fmt.Sprintf("%d", info.Containers),
			"images":          fmt.Sprintf("%d", info.Images),
			"docker_root_dir": info.DockerRootDir,
		},
	}, nil
}

func (s *SimpleDockerRuntime) ListContainers(ctx context.Context) ([]runtime.ContainerInfo, error) {
	containers, err := s.client.ContainerList(ctx, container.ListOptions{})
	if err != nil {
		return nil, fmt.Errorf("failed to list containers: %w", err)
	}

	var containerInfos []runtime.ContainerInfo
	for _, c := range containers {
		containerInfo := runtime.ContainerInfo{
			ID:     c.ID,
			Status: c.State,
			Image:  c.Image,
		}

		if len(c.Names) > 0 {
			containerInfo.ID = c.Names[0]
		}

		containerInfos = append(containerInfos, containerInfo)
	}

	return containerInfos, nil
}

func (s *SimpleDockerRuntime) StopExecution(ctx context.Context, executionID uuid.UUID) error {
	s.logger.Info("Stopping execution", zap.String("execution_id", executionID.String()))
	// In a real implementation, we would need to map execution IDs to container IDs
	// For now, we'll just log that this was called
	return nil
}

// Helper methods

func (s *SimpleDockerRuntime) createContainer(ctx context.Context, function *domain.FunctionDefinition, input json.RawMessage) (string, error) {
	// Prepare environment variables
	env := []string{}
	for key, value := range function.Environment {
		env = append(env, fmt.Sprintf("%s=%s", key, value))
	}

	// Add input as environment variable
	inputStr := string(input)
	if inputStr != "" {
		env = append(env, fmt.Sprintf("FUNCTION_INPUT=%s", inputStr))
	}

	// Add function code as environment variable for dynamic languages
	env = append(env, fmt.Sprintf("FUNCTION_CODE=%s", function.Code))
	env = append(env, fmt.Sprintf("FUNCTION_HANDLER=%s", function.Handler))

	// Create container config with proper command for runtime
	config := &container.Config{
		Image:        function.Image,
		Env:          env,
		AttachStdout: true,
		AttachStderr: true,
	}

	// Set command based on runtime
	switch function.Runtime {
	case "nodejs", "nodejs18", "nodejs20":
		config.Cmd = []string{"sh", "-c", `
			echo "$FUNCTION_CODE" > /tmp/index.js &&
			echo "const handler = require('/tmp/index.js').handler;
			const input = process.env.FUNCTION_INPUT ? JSON.parse(process.env.FUNCTION_INPUT) : {};
			const context = { functionName: '` + function.Name + `' };
			handler(input, context).then(result => console.log(JSON.stringify(result))).catch(err => { console.error(err); process.exit(1); });" > /tmp/runner.js &&
			node /tmp/runner.js
		`}
	case "python", "python3", "python3.9", "python3.10", "python3.11":
		config.Cmd = []string{"sh", "-c", `
			echo "$FUNCTION_CODE" > /tmp/handler.py &&
			echo "import json, os, sys; sys.path.insert(0, '/tmp'); from handler import handler;
			input_data = json.loads(os.environ.get('FUNCTION_INPUT', '{}'));
			context = {'function_name': '` + function.Name + `'};
			result = handler(input_data, context);
			print(json.dumps(result))" > /tmp/runner.py &&
			python /tmp/runner.py
		`}
	default:
		// For other runtimes, assume they handle execution themselves
		// This is for pre-built container images
	}

	// Create host config with resource limits
	hostConfig := &container.HostConfig{
		Resources: container.Resources{
			Memory: int64(function.Memory) * 1024 * 1024, // Convert MB to bytes
		},
	}

	// Apply timeout if set
	if function.Timeout.Duration > 0 {
		// Docker doesn't have a direct timeout, but we can set a reasonable upper limit
		// In a production system, you'd want to implement actual timeout handling
		hostConfig.Resources.NanoCPUs = 1000000000 // 1 CPU
	}

	resp, err := s.client.ContainerCreate(ctx, config, hostConfig, nil, nil, "")
	if err != nil {
		return "", fmt.Errorf("failed to create container: %w", err)
	}

	return resp.ID, nil
}

func (s *SimpleDockerRuntime) getContainerLogs(ctx context.Context, containerID string) ([]string, error) {
	// Get container logs
	logs, err := s.client.ContainerLogs(ctx, containerID, container.LogsOptions{
		ShowStdout: true,
		ShowStderr: true,
		Tail:       "50", // Get last 50 lines
	})
	if err != nil {
		return nil, fmt.Errorf("failed to get container logs: %w", err)
	}
	defer logs.Close()

	// For simplicity, we'll return a placeholder
	// In a real implementation, you'd parse the log output
	return []string{
		"Container logs would appear here",
		"Function execution started",
		"Function execution completed",
	}, nil
}

func (s *SimpleDockerRuntime) getContainerOutput(ctx context.Context, containerID string) (json.RawMessage, error) {
	// Get container logs as output
	logs, err := s.client.ContainerLogs(ctx, containerID, container.LogsOptions{
		ShowStdout: true,
		ShowStderr: true,
		Tail:       "100", // Get last 100 lines
	})
	if err != nil {
		return nil, fmt.Errorf("failed to get container logs: %w", err)
	}
	defer logs.Close()

	// Read the actual logs content
	buf := make([]byte, 4096)
	var output strings.Builder
	for {
		n, err := logs.Read(buf)
		if n > 0 {
			// Docker logs include 8-byte headers, skip them for stdout content
			if n > 8 {
				output.Write(buf[8:n])
			}
		}
		if err != nil {
			break
		}
	}

	logContent := strings.TrimSpace(output.String())

	// Try to parse as JSON first, if that fails, wrap in a JSON object
	if json.Valid([]byte(logContent)) && logContent != "" {
		return json.RawMessage(logContent), nil
	} else {
		// Return the output wrapped in a JSON object
		result := map[string]interface{}{
			"result": "Function executed successfully",
			"output": logContent,
			"timestamp": time.Now().UTC(),
		}
		resultJSON, _ := json.Marshal(result)
		return json.RawMessage(resultJSON), nil
	}
}

func (s *SimpleDockerRuntime) cleanupContainer(ctx context.Context, containerID string) {
	// Remove container
	if err := s.client.ContainerRemove(ctx, containerID, container.RemoveOptions{
		Force: true,
	}); err != nil {
		s.logger.Warn("Failed to remove container",
			zap.String("container_id", containerID),
			zap.Error(err))
	}
}