Skip to content

Commit a08b862

Browse files
tyrchenclaude
andauthored
fix(health): remove TCP half-close that breaks Docker health check (#2)
## Summary - **Root cause**: The health check client called `writer.shutdown()` after sending the HTTP request, which sends a TCP FIN. Hyper interprets the FIN as the connection closing prematurely and aborts before sending the response — producing `"connection closed before message completed"` errors every 2s and marking the container unhealthy. - **Fix**: Remove the stream split and half-close. The GET request is self-framing and the `Connection: close` header tells hyper to close after responding, giving `read_to_string` its EOF naturally. - Downgrade connection-error log from `error!` to `warn!` since client resets are normal. Closes #1 ## Test plan - [x] `cargo build` / `cargo test` / `cargo clippy` pass - [x] Run `docker compose up` with the image rebuilt from this branch and verify container shows `(healthy)` - [x] Confirm no `"connection closed before message completed"` errors in logs 🤖 Generated with [Claude Code](https://claude.com/claude-code) --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
1 parent b1d1e9f commit a08b862

File tree

4 files changed

+120
-9
lines changed

4 files changed

+120
-9
lines changed

.github/workflows/s3-test.yml

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ jobs:
3131
SSM_SKIP_SIGNATURE_VALIDATION=true \
3232
GATEWAY_LISTEN=0.0.0.0:4566 \
3333
LOG_LEVEL=warn \
34-
cargo run --release -p ruststack-server &
34+
cargo run --release -p ruststack-server > /tmp/ruststack.log 2>&1 &
3535
3636
for i in $(seq 1 30); do
3737
if curl -sf http://127.0.0.1:4566/_localstack/health > /dev/null 2>&1; then
@@ -52,6 +52,25 @@ jobs:
5252
echo "AWS_ACCESS_KEY_ID=test" >> "$GITHUB_ENV"
5353
echo "AWS_SECRET_ACCESS_KEY=test" >> "$GITHUB_ENV"
5454
55+
# ── Health Check Verification ────────────────────────────────────
56+
- name: "Health Check: verify --health-check flag and no connection errors"
57+
run: |
58+
set -euo pipefail
59+
60+
# Run the built-in health check multiple times
61+
for i in $(seq 1 5); do
62+
./target/release/ruststack-server --health-check
63+
done
64+
65+
# Verify the server log has no "connection closed before message completed" errors
66+
if grep -q "connection closed before message completed" /tmp/ruststack.log; then
67+
echo "::error::Server logged 'connection closed before message completed' — health check is broken"
68+
cat /tmp/ruststack.log
69+
exit 1
70+
fi
71+
72+
echo "Health check passed 5 times with no connection errors"
73+
5574
# ── Bucket Operations ───────────────────────────────────────────
5675
- name: "Bucket Operations: create, head, list, locate, delete"
5776
run: |

apps/ruststack-server/src/main.rs

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ use anyhow::{Context, Result};
3939
use hyper_util::rt::{TokioExecutor, TokioIo};
4040
use hyper_util::server::conn::auto::Builder as HttpConnBuilder;
4141
use tokio::net::TcpListener;
42-
use tracing::{error, info, warn};
42+
use tracing::{info, warn};
4343
use tracing_subscriber::EnvFilter;
4444

4545
#[cfg(feature = "dynamodb")]
@@ -252,7 +252,7 @@ async fn serve(listener: TcpListener, service: GatewayService) -> Result<()> {
252252

253253
tokio::spawn(async move {
254254
if let Err(e) = conn.await {
255-
error!(peer_addr = %peer_addr, error = %e, "connection error");
255+
warn!(peer_addr = %peer_addr, error = %e, "connection error");
256256
}
257257
});
258258
}
@@ -333,19 +333,20 @@ async fn run_health_check(addr: &str) -> Result<()> {
333333
use tokio::io::{AsyncReadExt, AsyncWriteExt};
334334
use tokio::net::TcpStream;
335335

336-
let stream = TcpStream::connect(addr)
336+
let mut stream = TcpStream::connect(addr)
337337
.await
338338
.with_context(|| format!("cannot connect to {addr}"))?;
339339

340-
let (mut reader, mut writer) = stream.into_split();
341-
342340
let request =
343341
format!("GET /_localstack/health HTTP/1.1\r\nHost: {addr}\r\nConnection: close\r\n\r\n");
344-
writer.write_all(request.as_bytes()).await?;
345-
writer.shutdown().await?;
342+
stream.write_all(request.as_bytes()).await?;
343+
// Do not half-close the write side: the HTTP request is self-framing
344+
// (GET with no body) and the Connection: close header tells hyper not
345+
// to expect further requests. Hyper will close after responding,
346+
// which gives read_to_string its EOF.
346347

347348
let mut response = String::new();
348-
reader.read_to_string(&mut response).await?;
349+
stream.read_to_string(&mut response).await?;
349350

350351
// Accept any 200 response that reports at least one running service.
351352
if response.contains("200 OK") && response.contains("\"running\"") {

tests/integration/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -238,6 +238,7 @@ mod test_bucket;
238238
mod test_cors;
239239
mod test_dynamodb;
240240
mod test_error;
241+
mod test_health;
241242
mod test_lambda;
242243
mod test_list;
243244
mod test_multipart;
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
//! Health check integration tests.
2+
//!
3+
//! Verifies that the `/_localstack/health` endpoint works correctly and that
4+
//! raw TCP health checks (as used by Docker `HEALTHCHECK`) do not produce
5+
//! "connection closed before message completed" errors.
6+
7+
#[cfg(test)]
8+
mod tests {
9+
use tokio::io::{AsyncReadExt, AsyncWriteExt};
10+
use tokio::net::TcpStream;
11+
12+
use crate::endpoint_url;
13+
14+
/// Extract host:port from the endpoint URL.
15+
fn endpoint_addr() -> String {
16+
let url = endpoint_url();
17+
url.strip_prefix("http://").unwrap_or(&url).to_owned()
18+
}
19+
20+
/// Simulate the exact same TCP-level health check that the Docker
21+
/// HEALTHCHECK uses (`ruststack-server --health-check`). The fix for
22+
/// issue #1 removed the `writer.shutdown()` call; this test verifies
23+
/// the server responds correctly without a TCP half-close.
24+
#[tokio::test]
25+
#[ignore = "requires running server"]
26+
async fn test_should_respond_to_raw_tcp_health_check() {
27+
let addr = endpoint_addr();
28+
29+
let mut stream = TcpStream::connect(&addr)
30+
.await
31+
.unwrap_or_else(|e| panic!("cannot connect to {addr}: {e}"));
32+
33+
let request = format!(
34+
"GET /_localstack/health HTTP/1.1\r\nHost: {addr}\r\nConnection: close\r\n\r\n",
35+
);
36+
stream
37+
.write_all(request.as_bytes())
38+
.await
39+
.expect("failed to write request");
40+
// Intentionally NOT calling stream.shutdown() — this is the fix.
41+
42+
let mut response = String::new();
43+
stream
44+
.read_to_string(&mut response)
45+
.await
46+
.expect("failed to read response");
47+
48+
assert!(
49+
response.contains("200 OK"),
50+
"expected 200 OK in response, got: {response}",
51+
);
52+
assert!(
53+
response.contains("\"running\""),
54+
"expected 'running' in response, got: {response}",
55+
);
56+
}
57+
58+
/// Run multiple sequential health checks to ensure the server does not
59+
/// accumulate connection errors over time.
60+
#[tokio::test]
61+
#[ignore = "requires running server"]
62+
async fn test_should_handle_repeated_health_checks() {
63+
let addr = endpoint_addr();
64+
65+
for i in 0..10 {
66+
let mut stream = TcpStream::connect(&addr)
67+
.await
68+
.unwrap_or_else(|e| panic!("connect #{i} to {addr} failed: {e}"));
69+
70+
let request = format!(
71+
"GET /_localstack/health HTTP/1.1\r\nHost: {addr}\r\nConnection: close\r\n\r\n",
72+
);
73+
stream
74+
.write_all(request.as_bytes())
75+
.await
76+
.unwrap_or_else(|e| panic!("write #{i} failed: {e}"));
77+
78+
let mut response = String::new();
79+
stream
80+
.read_to_string(&mut response)
81+
.await
82+
.unwrap_or_else(|e| panic!("read #{i} failed: {e}"));
83+
84+
assert!(
85+
response.contains("200 OK"),
86+
"health check #{i} failed: {response}",
87+
);
88+
}
89+
}
90+
}

0 commit comments

Comments
 (0)