chunked logs

breardon2011 · breardon2011 · commit 62b83feff6d8 · 2025-11-21T12:02:54.000-08:00
diff --git a/sandbox-sidecar/src/runners/e2bRunner.ts b/sandbox-sidecar/src/runners/e2bRunner.ts
@@ -68,8 +68,6 @@ export class E2BSandboxRunner implements SandboxRunner {
         sandbox,
         workDir,
         ["show", "-json", "tfplan.binary"],
-        logs,
-        streamLog,
       );
 
       const planJSON = showResult.stdout;
diff --git a/taco/internal/tfe/apply.go b/taco/internal/tfe/apply.go
@@ -101,16 +101,29 @@ func (h *TfeHandler) GetApplyLogs(c echo.Context) error {
 		return c.JSON(http.StatusNotFound, map[string]string{"error": "apply not found"})
 	}
 
-	// Read apply logs from chunked S3 objects
+	// Read apply logs from chunked S3 objects (fixed 2KB chunks)
 	// Chunks are stored as applies/{applyID}/chunks/00000001.log, 00000002.log, etc.
+	const chunkSize = 2 * 1024 // Must match writer's chunk size
+
+	// Determine which chunk contains the requested offset to avoid re-downloading
+	// data the client already has.
+	startChunk := 1
+	if offsetInt > 1 { // offset includes STX byte at offset 0
+		logOffset := offsetInt - 1
+		startChunk = int(logOffset/chunkSize) + 1
+	}
+
+	// Number of bytes before the first chunk we fetch (used to map offsets)
+	bytesBefore := int64(chunkSize * (startChunk - 1))
+
 	var logText string
-	chunkIndex := 1
+	chunkIndex := startChunk
 	var fullLogs strings.Builder
-	
+
 	for {
 		chunkKey := fmt.Sprintf("applies/%s/chunks/%08d.log", run.ID, chunkIndex)
 		logData, err := h.blobStore.DownloadBlob(ctx, chunkKey)
-		
+
 		if err != nil {
 			// Chunk doesn't exist - check if apply is still running
 			if run.Status == "applied" || run.Status == "errored" {
@@ -120,15 +133,19 @@ func (h *TfeHandler) GetApplyLogs(c echo.Context) error {
 			// Apply still running, this chunk doesn't exist yet
 			break
 		}
-		
+
+		// Keep chunks at full 2048 bytes (don't trim nulls) for correct offset math
 		fullLogs.Write(logData)
 		chunkIndex++
 	}
-	
+
 	logText = fullLogs.String()
-	
+
+	// NOW trim all null bytes from the result (after offset calculation is done)
+	logText = strings.TrimRight(logText, "\x00")
+
 	// If no chunks exist yet, generate default message based on status
-	if logText == "" {
+	if logText == "" && offsetInt == 0 {
 		if run.Status == "applying" || run.Status == "apply_queued" {
 			logText = "Waiting for apply to start...\n"
 		} else {
@@ -146,16 +163,22 @@ func (h *TfeHandler) GetApplyLogs(c echo.Context) error {
 		fmt.Printf("📤 APPLY LOGS at offset=0: STX + %d bytes of log text\n", len(logText))
 	} else {
 		// Client already received STX (1 byte at offset 0)
-		// Map stream offset to logText offset: streamOffset=1 → logText[0]
-		logOffset := offsetInt - 1
+		// Map stream offset to logText offset:
+		// - stream offset 0 = STX
+		// - stream offset 1 = first byte of full logs
+		// We only fetched chunks starting at startChunk, so subtract bytesBefore.
+		logOffset := offsetInt - 1 - bytesBefore
+		if logOffset < 0 {
+			logOffset = 0
+		}
 
 		if logOffset < int64(len(logText)) {
 			// Send remaining log text
 			responseData = []byte(logText[logOffset:])
 			fmt.Printf("📤 APPLY LOGS at offset=%d: sending %d bytes (logText[%d:])\n",
 				offsetInt, len(responseData), logOffset)
-		} else if logOffset == int64(len(logText)) && run.Status == "applied" {
-			// All logs sent, send ETX
+		} else if run.Status == "applied" || run.Status == "errored" {
+			// All logs sent (or client offset beyond end), send ETX
 			responseData = []byte{0x03}
 			fmt.Printf("📤 Sending ETX (End of Text) for apply %s - logs complete\n", applyID)
 		} else {
diff --git a/taco/internal/tfe/apply_executor.go b/taco/internal/tfe/apply_executor.go
@@ -149,34 +149,57 @@ func (e *ApplyExecutor) ExecuteApply(ctx context.Context, runID string) error {
 
 	// Chunked logging to prevent memory bloat
 	// Upload log chunks as separate S3 objects and clear buffer after each upload
+	// Fixed-size 2KB chunks enable offset-based chunk selection (reduces S3 re-downloads)
+	const chunkSize = 2 * 1024 // 2KB fixed size
 	chunkIndex := 1
 	var logBuffer bytes.Buffer
 	var logMutex sync.Mutex
 	lastLogFlush := time.Now()
 	
-	// Flush helper - uploads current buffer as a chunk and clears it
+	// Flush helper - uploads current buffer as a padded 2KB chunk and clears it
 	flushLogs := func() error {
 		logMutex.Lock()
 		if logBuffer.Len() == 0 {
 			logMutex.Unlock()
 			return nil
 		}
-		// Copy buffer to avoid holding lock during upload
-		data := make([]byte, logBuffer.Len())
-		copy(data, logBuffer.Bytes())
+		
+		// Extract at most chunkSize bytes (2KB)
+		dataLen := logBuffer.Len()
+		if dataLen > chunkSize {
+			dataLen = chunkSize
+		}
+		data := make([]byte, dataLen)
+		copy(data, logBuffer.Bytes()[:dataLen])
 		currentChunk := chunkIndex
+		chunkIndex++ // Increment NOW before unlock to reserve this chunk number atomically
+		
+		// Copy remainder BEFORE resetting (crucial - remainder slice points to internal buffer)
+		var remainderCopy []byte
+		if logBuffer.Len() > dataLen {
+			remainder := logBuffer.Bytes()[dataLen:]
+			remainderCopy = make([]byte, len(remainder))
+			copy(remainderCopy, remainder)
+		}
+		
+		// Now safe to reset and write remainder back
+		logBuffer.Reset()
+		if len(remainderCopy) > 0 {
+			logBuffer.Write(remainderCopy)
+		}
 		logMutex.Unlock()
 
+		// Pad to fixed 2KB size (rest will be null bytes)
+		paddedData := make([]byte, chunkSize)
+		copy(paddedData, data)
+
 		// Upload this chunk (key includes zero-padded chunk index)
 		chunkKey := fmt.Sprintf("applies/%s/chunks/%08d.log", run.ID, currentChunk)
-		err := e.blobStore.UploadBlob(ctx, chunkKey, data)
+		err := e.blobStore.UploadBlob(ctx, chunkKey, paddedData)
 		
 		if err == nil {
 			logMutex.Lock()
 			lastLogFlush = time.Now()
-			// Clear buffer to free memory
-			logBuffer.Reset()
-			chunkIndex++
 			logMutex.Unlock()
 		}
 		return err
@@ -186,8 +209,8 @@ func (e *ApplyExecutor) ExecuteApply(ctx context.Context, runID string) error {
 		logMutex.Lock()
 		logBuffer.WriteString(message)
 		now := time.Now()
-		// Flush if buffer exceeds chunk size (256KB) or 1s has passed
-		shouldFlush := logBuffer.Len() > 256*1024 || now.Sub(lastLogFlush) > 1*time.Second
+		// Flush if buffer exceeds 2KB or 1s has passed
+		shouldFlush := logBuffer.Len() > chunkSize || now.Sub(lastLogFlush) > 1*time.Second
 		logMutex.Unlock()
 		
 		if shouldFlush {
@@ -334,7 +357,9 @@ func (e *ApplyExecutor) ExecuteApply(ctx context.Context, runID string) error {
 	}
 
 	// Append the actual terraform output to the progress logs
-	appendLog("\n" + logs)
+	if !useSandbox {
+		appendLog("\n" + logs)
+	}
 
 	// Store final status
 	if applyErr != nil {
diff --git a/taco/internal/tfe/plan.go b/taco/internal/tfe/plan.go
@@ -64,7 +64,7 @@ func (h *TfeHandler) GetPlan(c echo.Context) error {
 			ID: plan.RunID,
 		},
 	}
-	
+
 	// Only include resource counts when plan is finished
 	// If we send HasChanges:false before the plan completes, Terraform CLI
 	// will think there's nothing to apply and won't prompt for confirmation!
@@ -105,16 +105,29 @@ func (h *TfeHandler) GetPlanLogs(c echo.Context) error {
 		return c.JSON(http.StatusNotFound, map[string]string{"error": "plan not found"})
 	}
 
-	// Read logs from chunked S3 objects
+	// Read logs from chunked S3 objects (fixed 2KB chunks)
 	// Chunks are stored as plans/{planID}/chunks/00000001.log, 00000002.log, etc.
+	const chunkSize = 2 * 1024 // Must match writer's chunk size
+
+	// Determine which chunk contains the requested offset to avoid re-downloading
+	// data the client already has.
+	startChunk := 1
+	if offsetInt > 1 { // offset includes STX byte at offset 0
+		logOffset := offsetInt - 1
+		startChunk = int(logOffset/chunkSize) + 1
+	}
+
+	// Number of bytes before the first chunk we fetch (used to map offsets)
+	bytesBefore := int64(chunkSize * (startChunk - 1))
+
 	var logText string
-	chunkIndex := 1
+	chunkIndex := startChunk
 	var fullLogs strings.Builder
-	
+
 	for {
 		chunkKey := fmt.Sprintf("plans/%s/chunks/%08d.log", planID, chunkIndex)
 		logData, err := h.blobStore.DownloadBlob(ctx, chunkKey)
-		
+
 		if err != nil {
 			// Chunk doesn't exist - check if plan is still running
 			if plan.Status == "finished" || plan.Status == "errored" {
@@ -124,22 +137,26 @@ func (h *TfeHandler) GetPlanLogs(c echo.Context) error {
 			// Plan still running, this chunk doesn't exist yet
 			break
 		}
-		
+
+		// Keep chunks at full 2048 bytes (don't trim nulls) for correct offset math
 		fullLogs.Write(logData)
 		chunkIndex++
 	}
-	
+
 	logText = fullLogs.String()
-	
-	// If no chunks exist yet, generate default logs based on status
-	if logText == "" {
+
+	// NOW trim all null bytes from the result (after offset calculation is done)
+	logText = strings.TrimRight(logText, "\x00")
+
+	// If no chunks exist yet, generate default logs based on status (only on first request)
+	if logText == "" && offsetInt == 0 {
 		logText = generateDefaultPlanLogs(plan)
 	}
 
 	// Handle offset for streaming with proper byte accounting
 	// Stream format: [STX at offset 0][logText at offset 1+][ETX at offset 1+len(logText)]
 	var responseData []byte
-	
+
 	if offsetInt == 0 {
 		// First request: send STX + current logs
 		responseData = append([]byte{0x02}, []byte(logText)...)
@@ -149,20 +166,26 @@ func (h *TfeHandler) GetPlanLogs(c echo.Context) error {
 		}
 	} else {
 		// Client already received STX (1 byte at offset 0)
-		// Map stream offset to logText offset: streamOffset=1 → logText[0]
-		logOffset := offsetInt - 1
-		
+		// Map stream offset to logText offset:
+		// - stream offset 0 = STX
+		// - stream offset 1 = first byte of full logs
+		// We only fetched chunks starting at startChunk, so subtract bytesBefore.
+		logOffset := offsetInt - 1 - bytesBefore
+		if logOffset < 0 {
+			logOffset = 0
+		}
+
 		if logOffset < int64(len(logText)) {
 			// Send remaining log text
 			responseData = []byte(logText[logOffset:])
-			fmt.Printf("📤 PLAN LOGS at offset=%d: sending %d bytes (logText[%d:])\n", 
+			fmt.Printf("📤 PLAN LOGS at offset=%d: sending %d bytes (logText[%d:])\n",
 				offsetInt, len(responseData), logOffset)
-		} else if logOffset == int64(len(logText)) && plan.Status == "finished" {
-			// All logs sent, send ETX
+		} else if plan.Status == "finished" || plan.Status == "errored" {
+			// All logs sent, send ETX to stop polling
 			responseData = []byte{0x03}
 			fmt.Printf("📤 Sending ETX (End of Text) for plan %s - logs complete\n", planID)
 		} else {
-			// Waiting for more logs or already sent ETX
+			// Waiting for more logs
 			responseData = []byte{}
 			fmt.Printf("📤 PLAN LOGS at offset=%d: no new data (waiting or complete)\n", offsetInt)
 		}
@@ -206,7 +229,7 @@ func (h *TfeHandler) GetPlanJSONOutput(c echo.Context) error {
 		// Create dummy resource changes based on our counts
 		// The CLI checks if this array has entries to decide whether to prompt
 		resourceChanges := make([]interface{}, 0)
-		
+
 		// Add placeholder entries for additions
 		for i := 0; i < plan.ResourceAdditions; i++ {
 			resourceChanges = append(resourceChanges, map[string]interface{}{
@@ -215,7 +238,7 @@ func (h *TfeHandler) GetPlanJSONOutput(c echo.Context) error {
 				},
 			})
 		}
-		
+
 		// Add placeholder entries for changes
 		for i := 0; i < plan.ResourceChanges; i++ {
 			resourceChanges = append(resourceChanges, map[string]interface{}{
@@ -224,7 +247,7 @@ func (h *TfeHandler) GetPlanJSONOutput(c echo.Context) error {
 				},
 			})
 		}
-		
+
 		// Add placeholder entries for destructions
 		for i := 0; i < plan.ResourceDestructions; i++ {
 			resourceChanges = append(resourceChanges, map[string]interface{}{
@@ -233,7 +256,7 @@ func (h *TfeHandler) GetPlanJSONOutput(c echo.Context) error {
 				},
 			})
 		}
-		
+
 		jsonPlan["resource_changes"] = resourceChanges
 	}
 
@@ -246,7 +269,7 @@ func generateDefaultPlanLogs(plan *domain.TFEPlan) string {
 	// Don't show resource counts in logs until plan is finished
 	// Terraform CLI parses the logs to determine if changes exist!
 	if plan.Status == "finished" {
-	return fmt.Sprintf(`Terraform used the selected providers to generate the following execution plan.
+		return fmt.Sprintf(`Terraform used the selected providers to generate the following execution plan.
 Resource actions are indicated with the following symbols:
   + create
   - destroy
@@ -260,4 +283,3 @@ Plan: %d to add, %d to change, %d to destroy.
 	// The CLI will keep polling until it gets real content.
 	return ""
 }
-
diff --git a/taco/internal/tfe/plan_executor.go b/taco/internal/tfe/plan_executor.go
diff --git a/taco/internal/tfe/runs.go b/taco/internal/tfe/runs.go