[Debuginfod] Don't depend on Content-Length.

The present implementation of debuginfod lookups requires the
Content-Length field to be populated in the HTTP server response.
Unfortunately, Content-Length is optional, and there are some real
scenarios where it's missing. (For example, a Google Cloud Storage
server doing on-the-fly gunzipping.)

This changes the debuginfod response handler to directly stream the
output to the cache file as it is received. In addition to allowing
lookups to proceed without a Content-Lenght, it seems somewhat more
straightforward to implement, and it allows the disk I/O to be
interleaved with the network I/O.

Reviewed By: noajshu

Differential Revision: https://reviews.llvm.org/D121720
This commit is contained in:
Daniel Thornburgh
2022-03-11 01:51:13 +00:00
parent 8692e27ad6
commit 7917b3c695
5 changed files with 59 additions and 229 deletions

View File

@@ -7,9 +7,8 @@
//===----------------------------------------------------------------------===//
///
/// \file
/// This file contains the declarations of the HTTPClient, HTTPMethod,
/// HTTPResponseHandler, and BufferedHTTPResponseHandler classes, as well as
/// the HTTPResponseBuffer and HTTPRequest structs.
/// This file contains the declarations of the HTTPClient library for issuing
/// HTTP requests and handling the responses.
///
//===----------------------------------------------------------------------===//
@@ -40,43 +39,13 @@ bool operator==(const HTTPRequest &A, const HTTPRequest &B);
/// of its methods.
class HTTPResponseHandler {
public:
/// Processes one line of HTTP response headers.
virtual Error handleHeaderLine(StringRef HeaderLine) = 0;
/// Processes an additional chunk of bytes of the HTTP response body.
virtual Error handleBodyChunk(StringRef BodyChunk) = 0;
/// Processes the HTTP response status code.
virtual Error handleStatusCode(unsigned Code) = 0;
protected:
~HTTPResponseHandler();
};
/// An HTTP response status code bundled with a buffer to store the body.
struct HTTPResponseBuffer {
unsigned Code = 0;
std::unique_ptr<WritableMemoryBuffer> Body;
};
/// A simple handler which writes returned data to an HTTPResponseBuffer.
/// Ignores all headers except the Content-Length, which it uses to
/// allocate an appropriately-sized Body buffer.
class BufferedHTTPResponseHandler final : public HTTPResponseHandler {
size_t Offset = 0;
public:
/// Stores the data received from the HTTP server.
HTTPResponseBuffer ResponseBuffer;
/// These callbacks store the body and status code in an HTTPResponseBuffer
/// allocated based on Content-Length. The Content-Length header must be
/// handled by handleHeaderLine before any calls to handleBodyChunk.
Error handleHeaderLine(StringRef HeaderLine) override;
Error handleBodyChunk(StringRef BodyChunk) override;
Error handleStatusCode(unsigned Code) override;
};
/// A reusable client that can perform HTTPRequests through a network socket.
class HTTPClient {
#ifdef LLVM_ENABLE_CURL
@@ -107,13 +76,8 @@ public:
/// Handler method.
Error perform(const HTTPRequest &Request, HTTPResponseHandler &Handler);
/// Performs the Request with the default BufferedHTTPResponseHandler, and
/// returns its HTTPResponseBuffer or an Error.
Expected<HTTPResponseBuffer> perform(const HTTPRequest &Request);
/// Performs an HTTPRequest with the default configuration to make a GET
/// request to the given Url. Returns an HTTPResponseBuffer or an Error.
Expected<HTTPResponseBuffer> get(StringRef Url);
/// Returns the last received response code or zero if none.
unsigned responseCode();
};
} // end namespace llvm

View File

@@ -115,6 +115,40 @@ Expected<std::string> getCachedOrDownloadArtifact(StringRef UniqueKey,
getDefaultDebuginfodTimeout());
}
namespace {
/// A simple handler which streams the returned data to a cache file. The cache
/// file is only created if a 200 OK status is observed.
class StreamedHTTPResponseHandler : public HTTPResponseHandler {
using CreateStreamFn =
std::function<Expected<std::unique_ptr<CachedFileStream>>()>;
CreateStreamFn CreateStream;
HTTPClient &Client;
std::unique_ptr<CachedFileStream> FileStream;
public:
StreamedHTTPResponseHandler(CreateStreamFn CreateStream, HTTPClient &Client)
: CreateStream(CreateStream), Client(Client) {}
Error handleBodyChunk(StringRef BodyChunk) override;
};
} // namespace
Error StreamedHTTPResponseHandler::handleBodyChunk(StringRef BodyChunk) {
if (!FileStream) {
if (Client.responseCode() != 200)
return Error::success();
Expected<std::unique_ptr<CachedFileStream>> FileStreamOrError =
CreateStream();
if (!FileStreamOrError)
return FileStreamOrError.takeError();
FileStream = std::move(*FileStreamOrError);
}
*FileStream->OS << BodyChunk;
return Error::success();
}
Expected<std::string> getCachedOrDownloadArtifact(
StringRef UniqueKey, StringRef UrlPath, StringRef CacheDirectoryPath,
ArrayRef<StringRef> DebuginfodUrls, std::chrono::milliseconds Timeout) {
@@ -155,28 +189,18 @@ Expected<std::string> getCachedOrDownloadArtifact(
SmallString<64> ArtifactUrl;
sys::path::append(ArtifactUrl, sys::path::Style::posix, ServerUrl, UrlPath);
Expected<HTTPResponseBuffer> ResponseOrErr = Client.get(ArtifactUrl);
if (!ResponseOrErr)
return ResponseOrErr.takeError();
// Perform the HTTP request and if successful, write the response body to
// the cache.
StreamedHTTPResponseHandler Handler([&]() { return CacheAddStream(Task); },
Client);
HTTPRequest Request(ArtifactUrl);
Error Err = Client.perform(Request, Handler);
if (Err)
return Err;
HTTPResponseBuffer &Response = *ResponseOrErr;
if (Response.Code != 200)
if (Client.responseCode() != 200)
continue;
// We have retrieved the artifact from this server, and now add it to the
// file cache.
Expected<std::unique_ptr<CachedFileStream>> FileStreamOrErr =
CacheAddStream(Task);
if (!FileStreamOrErr)
return FileStreamOrErr.takeError();
std::unique_ptr<CachedFileStream> &FileStream = *FileStreamOrErr;
if (!Response.Body)
return createStringError(
errc::io_error, "Unallocated MemoryBuffer in HTTPResponseBuffer.");
*FileStream->OS << StringRef(Response.Body->getBufferStart(),
Response.Body->getBufferSize());
// Return the path to the artifact on disk.
return std::string(AbsCachedArtifactPath);
}

View File

@@ -7,9 +7,8 @@
//===----------------------------------------------------------------------===//
///
/// \file
///
/// This file defines the methods of the HTTPRequest, HTTPClient, and
/// BufferedHTTPResponseHandler classes.
/// This file defines the implementation of the HTTPClient library for issuing
/// HTTP requests and handling the responses.
///
//===----------------------------------------------------------------------===//
@@ -34,44 +33,6 @@ bool operator==(const HTTPRequest &A, const HTTPRequest &B) {
HTTPResponseHandler::~HTTPResponseHandler() = default;
static inline bool parseContentLengthHeader(StringRef LineRef,
size_t &ContentLength) {
// Content-Length is a mandatory header, and the only one we handle.
return LineRef.consume_front("Content-Length: ") &&
to_integer(LineRef.trim(), ContentLength, 10);
}
Error BufferedHTTPResponseHandler::handleHeaderLine(StringRef HeaderLine) {
if (ResponseBuffer.Body)
return Error::success();
size_t ContentLength;
if (parseContentLengthHeader(HeaderLine, ContentLength))
ResponseBuffer.Body =
WritableMemoryBuffer::getNewUninitMemBuffer(ContentLength);
return Error::success();
}
Error BufferedHTTPResponseHandler::handleBodyChunk(StringRef BodyChunk) {
if (!ResponseBuffer.Body)
return createStringError(errc::io_error,
"Unallocated response buffer. HTTP Body data "
"received before Content-Length header.");
if (Offset + BodyChunk.size() > ResponseBuffer.Body->getBufferSize())
return createStringError(errc::io_error,
"Content size exceeds buffer size.");
memcpy(ResponseBuffer.Body->getBufferStart() + Offset, BodyChunk.data(),
BodyChunk.size());
Offset += BodyChunk.size();
return Error::success();
}
Error BufferedHTTPResponseHandler::handleStatusCode(unsigned Code) {
ResponseBuffer.Code = Code;
return Error::success();
}
bool HTTPClient::IsInitialized = false;
class HTTPClientCleanup {
@@ -80,18 +41,6 @@ public:
};
static const HTTPClientCleanup Cleanup;
Expected<HTTPResponseBuffer> HTTPClient::perform(const HTTPRequest &Request) {
BufferedHTTPResponseHandler Handler;
if (Error Err = perform(Request, Handler))
return std::move(Err);
return std::move(Handler.ResponseBuffer);
}
Expected<HTTPResponseBuffer> HTTPClient::get(StringRef Url) {
HTTPRequest Request(Url);
return perform(Request);
}
#ifdef LLVM_ENABLE_CURL
bool HTTPClient::isAvailable() { return true; }
@@ -128,18 +77,6 @@ struct CurlHTTPRequest {
llvm::Error ErrorState = Error::success();
};
static size_t curlHeaderFunction(char *Contents, size_t Size, size_t NMemb,
CurlHTTPRequest *CurlRequest) {
assert(Size == 1 && "The Size passed by libCURL to CURLOPT_HEADERFUNCTION "
"should always be 1.");
if (Error Err =
CurlRequest->Handler.handleHeaderLine(StringRef(Contents, NMemb))) {
CurlRequest->storeError(std::move(Err));
return 0;
}
return NMemb;
}
static size_t curlWriteFunction(char *Contents, size_t Size, size_t NMemb,
CurlHTTPRequest *CurlRequest) {
Size *= NMemb;
@@ -160,7 +97,6 @@ HTTPClient::HTTPClient() {
assert(Curl && "Curl could not be initialized");
// Set the callback hooks.
curl_easy_setopt(Curl, CURLOPT_WRITEFUNCTION, curlWriteFunction);
curl_easy_setopt(Curl, CURLOPT_HEADERFUNCTION, curlHeaderFunction);
}
HTTPClient::~HTTPClient() { curl_easy_cleanup(Curl); }
@@ -177,24 +113,21 @@ Error HTTPClient::perform(const HTTPRequest &Request,
CurlHTTPRequest CurlRequest(Handler);
curl_easy_setopt(Curl, CURLOPT_WRITEDATA, &CurlRequest);
curl_easy_setopt(Curl, CURLOPT_HEADERDATA, &CurlRequest);
CURLcode CurlRes = curl_easy_perform(Curl);
if (CurlRes != CURLE_OK)
return joinErrors(std::move(CurlRequest.ErrorState),
createStringError(errc::io_error,
"curl_easy_perform() failed: %s\n",
curl_easy_strerror(CurlRes)));
if (CurlRequest.ErrorState)
return std::move(CurlRequest.ErrorState);
unsigned Code;
curl_easy_getinfo(Curl, CURLINFO_RESPONSE_CODE, &Code);
if (Error Err = Handler.handleStatusCode(Code))
return joinErrors(std::move(CurlRequest.ErrorState), std::move(Err));
return std::move(CurlRequest.ErrorState);
}
unsigned HTTPClient::responseCode() {
long Code = 0;
curl_easy_getinfo(Curl, CURLINFO_RESPONSE_CODE, &Code);
return Code;
}
#else
HTTPClient::HTTPClient() = default;
@@ -214,4 +147,8 @@ Error HTTPClient::perform(const HTTPRequest &Request,
llvm_unreachable("No HTTP Client implementation available.");
}
unsigned HTTPClient::responseCode() {
llvm_unreachable("No HTTP Client implementation available.");
}
#endif

View File

@@ -1,5 +1,4 @@
add_llvm_unittest(DebuginfodTests
HTTPClientTests.cpp
DebuginfodTests.cpp
)

View File

@@ -1,94 +0,0 @@
//===-- llvm/unittest/Debuginfod/HTTPClientTests.cpp - unit tests ---------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "llvm/Debuginfod/HTTPClient.h"
#include "llvm/Support/Errc.h"
#include "llvm/Testing/Support/Error.h"
#include "gtest/gtest.h"
using namespace llvm;
TEST(BufferedHTTPResponseHandler, Lifecycle) {
BufferedHTTPResponseHandler Handler;
EXPECT_THAT_ERROR(Handler.handleHeaderLine("Content-Length: 36\r\n"),
Succeeded());
EXPECT_THAT_ERROR(Handler.handleBodyChunk("body:"), Succeeded());
EXPECT_THAT_ERROR(Handler.handleBodyChunk("this puts the total at 36 chars"),
Succeeded());
EXPECT_EQ(Handler.ResponseBuffer.Body->MemoryBuffer::getBuffer(),
"body:this puts the total at 36 chars");
// Additional content should be rejected by the handler.
EXPECT_THAT_ERROR(
Handler.handleBodyChunk("extra content past the content-length"),
Failed<llvm::StringError>());
// Test response code is set.
EXPECT_THAT_ERROR(Handler.handleStatusCode(200u), Succeeded());
EXPECT_EQ(Handler.ResponseBuffer.Code, 200u);
EXPECT_THAT_ERROR(Handler.handleStatusCode(400u), Succeeded());
EXPECT_EQ(Handler.ResponseBuffer.Code, 400u);
}
TEST(BufferedHTTPResponseHandler, NoContentLengthLifecycle) {
BufferedHTTPResponseHandler Handler;
EXPECT_EQ(Handler.ResponseBuffer.Code, 0u);
EXPECT_EQ(Handler.ResponseBuffer.Body, nullptr);
// A body chunk passed before the content-length header is an error.
EXPECT_THAT_ERROR(Handler.handleBodyChunk("body"),
Failed<llvm::StringError>());
EXPECT_THAT_ERROR(Handler.handleHeaderLine("a header line"), Succeeded());
EXPECT_THAT_ERROR(Handler.handleBodyChunk("body"),
Failed<llvm::StringError>());
}
TEST(BufferedHTTPResponseHandler, ZeroContentLength) {
BufferedHTTPResponseHandler Handler;
EXPECT_THAT_ERROR(Handler.handleHeaderLine("Content-Length: 0"), Succeeded());
EXPECT_NE(Handler.ResponseBuffer.Body, nullptr);
EXPECT_EQ(Handler.ResponseBuffer.Body->getBufferSize(), 0u);
// All content should be rejected by the handler.
EXPECT_THAT_ERROR(Handler.handleBodyChunk("non-empty body content"),
Failed<llvm::StringError>());
}
TEST(BufferedHTTPResponseHandler, MalformedContentLength) {
// Check that several invalid content lengths are ignored.
BufferedHTTPResponseHandler Handler;
EXPECT_EQ(Handler.ResponseBuffer.Body, nullptr);
EXPECT_THAT_ERROR(Handler.handleHeaderLine("Content-Length: fff"),
Succeeded());
EXPECT_EQ(Handler.ResponseBuffer.Body, nullptr);
EXPECT_THAT_ERROR(Handler.handleHeaderLine("Content-Length: "),
Succeeded());
EXPECT_EQ(Handler.ResponseBuffer.Body, nullptr);
using namespace std::string_literals;
EXPECT_THAT_ERROR(Handler.handleHeaderLine("Content-Length: \0\0\0"s),
Succeeded());
EXPECT_EQ(Handler.ResponseBuffer.Body, nullptr);
EXPECT_THAT_ERROR(Handler.handleHeaderLine("Content-Length: -11"),
Succeeded());
EXPECT_EQ(Handler.ResponseBuffer.Body, nullptr);
// All content should be rejected by the handler because no valid
// Content-Length header has been received.
EXPECT_THAT_ERROR(Handler.handleBodyChunk("non-empty body content"),
Failed<llvm::StringError>());
}
#ifdef LLVM_ENABLE_CURL
TEST(HTTPClient, isAvailable) { EXPECT_TRUE(HTTPClient::isAvailable()); }
#endif