From 491395d2e388f852add78bb0ba71f62612566a81 Mon Sep 17 00:00:00 2001 From: MichalTrzeb Date: Sun, 9 Mar 2025 12:49:12 +0100 Subject: [PATCH 1/2] Fixing CcdbDownloader redirects This commit addresses: - Not following available redirects after receiving 4xx http code. - Not following all redirects provided via "Location" header. - Not following redirects after failing alien:/ or file:/ retrieval. - Improper fail-check in CcdbApi::loadLocalContentToMemory. - The headers holding etags and content-type from multiple locations. --- CCDB/include/CCDB/CCDBDownloader.h | 8 +++-- CCDB/src/CCDBDownloader.cxx | 53 +++++++++++++++++++++--------- CCDB/src/CcdbApi.cxx | 37 ++++++++++++++++++--- 3 files changed, 76 insertions(+), 22 deletions(-) diff --git a/CCDB/include/CCDB/CCDBDownloader.h b/CCDB/include/CCDB/CCDBDownloader.h index 0bda186e308c6..6c057a537a096 100644 --- a/CCDB/include/CCDB/CCDBDownloader.h +++ b/CCDB/include/CCDB/CCDBDownloader.h @@ -47,6 +47,7 @@ struct HeaderObjectPair_t { typedef struct DownloaderRequestData { std::vector hosts; + std::vector locations; std::string path; long timestamp; HeaderObjectPair_t hoPair; @@ -231,12 +232,13 @@ class CCDBDownloader std::string prepareRedirectedURL(std::string address, std::string potentialHost) const; /** - * Returns a vector of possible content locations based on the redirect headers. + * Updates the locations vector with the the locations. * - * @param baseUrl Content path. * @param headerMap Map containing response headers. + * @param locations Location list to be updated. + * @param locIndex Index of the next locaiton to be tried. */ - std::vector getLocations(std::multimap* headerMap) const; + void updateLocations(std::multimap* headerMap, std::vector* locations, int* locIndex) const; std::string mUserAgentId = "CCDBDownloader"; /** diff --git a/CCDB/src/CCDBDownloader.cxx b/CCDB/src/CCDBDownloader.cxx index 3fca3c8cc2ae6..ab5acd629ea5f 100644 --- a/CCDB/src/CCDBDownloader.cxx +++ b/CCDB/src/CCDBDownloader.cxx @@ -362,7 +362,7 @@ void CCDBDownloader::tryNewHost(PerformData* performData, CURL* easy_handle) { auto requestData = performData->requestData; std::string newUrl = requestData->hosts.at(performData->hostInd) + "/" + requestData->path + "/" + std::to_string(requestData->timestamp); - LOG(debug) << "Connecting to another host " << newUrl; + LOG(debug) << "Connecting to another host " << newUrl << "\n"; requestData->hoPair.header.clear(); curl_easy_setopt(easy_handle, CURLOPT_URL, newUrl.c_str()); mHandlesToBeAdded.push_back(easy_handle); @@ -374,9 +374,11 @@ void CCDBDownloader::getLocalContent(PerformData* performData, std::string& newL LOG(debug) << "Redirecting to local content " << newLocation << "\n"; if (requestData->localContentCallback(newLocation)) { contentRetrieved = true; + LOG(debug) << "Local content retrieved succesfully: " << newLocation << " n"; } else { // Prepare next redirect url newLocation = getNewLocation(performData, locations); + LOG(debug) << "Failed to retrieve local content: " << newLocation << "\n"; } } @@ -396,7 +398,7 @@ std::string CCDBDownloader::getNewLocation(PerformData* performData, std::vector void CCDBDownloader::httpRedirect(PerformData* performData, std::string& newLocation, CURL* easy_handle) { auto requestData = performData->requestData; - LOG(debug) << "Trying content location " << newLocation; + LOG(debug) << "Trying content location " << newLocation << "\n"; curl_easy_setopt(easy_handle, CURLOPT_URL, newLocation.c_str()); mHandlesToBeAdded.push_back(easy_handle); } @@ -404,7 +406,7 @@ void CCDBDownloader::httpRedirect(PerformData* performData, std::string& newLoca void CCDBDownloader::followRedirect(PerformData* performData, CURL* easy_handle, std::vector& locations, bool& rescheduled, bool& contentRetrieved) { std::string newLocation = getNewLocation(performData, locations); - if (newLocation.find("alien:/", 0) != std::string::npos || newLocation.find("file:/", 0) != std::string::npos) { + while (!contentRetrieved && (newLocation.find("alien:/", 0) != std::string::npos || newLocation.find("file:/", 0) != std::string::npos)) { getLocalContent(performData, newLocation, contentRetrieved, locations); } if (!contentRetrieved && newLocation != "") { @@ -508,8 +510,8 @@ void CCDBDownloader::transferFinished(CURL* easy_handle, CURLcode curlCode) std::string currentHost = requestData->hosts[performData->hostInd]; std::string loggingMessage = prepareLogMessage(currentHost, requestData->userAgent, requestData->path, requestData->timestamp, requestData->headers, httpCode); - // Get alternative locations for the same host - auto locations = getLocations(&(requestData->hoPair.header)); + // Get new locations based on received headers + updateLocations(&(requestData->hoPair.header), &requestData->locations, &performData->locInd); // React to received http code if (200 <= httpCode && httpCode < 400) { @@ -517,8 +519,8 @@ void CCDBDownloader::transferFinished(CURL* easy_handle, CURLcode curlCode) if (304 == httpCode) { LOGP(debug, "Object exists but I am not serving it since it's already in your possession"); contentRetrieved = true; - } else if (300 <= httpCode && httpCode < 400 && performData->locInd < locations.size()) { - followRedirect(performData, easy_handle, locations, rescheduled, contentRetrieved); + } else if (300 <= httpCode && httpCode < 400 && performData->locInd < requestData->locations.size()) { + followRedirect(performData, easy_handle, requestData->locations, rescheduled, contentRetrieved); } else if (200 <= httpCode && httpCode < 300) { contentRetrieved = true; // Can be overruled by following error check } @@ -531,8 +533,16 @@ void CCDBDownloader::transferFinished(CURL* easy_handle, CURLcode curlCode) contentRetrieved = false; } - // Check if content was retrieved, or scheduled to be retrieved - if (!rescheduled && !contentRetrieved && performData->locInd == locations.size()) { + // Check if content was retrieved or scheduled to be retrieved + if (!rescheduled && !contentRetrieved) { + // Current location failed without providing 3xx http code, try next redirect for the same host + if (performData->locInd < requestData->locations.size()) { + followRedirect(performData, easy_handle, requestData->locations, rescheduled, contentRetrieved); + } + } + + // Check again because content might have been retrieved or rescheduled via a redirect + if (!rescheduled && !contentRetrieved) { // Ran out of locations to redirect, try new host if (++performData->hostInd < requestData->hosts.size()) { tryNewHost(performData, easy_handle); @@ -650,24 +660,37 @@ CURLcode CCDBDownloader::perform(CURL* handle) return batchBlockingPerform(handleVector).back(); } -std::vector CCDBDownloader::getLocations(std::multimap* headerMap) const +void CCDBDownloader::updateLocations(std::multimap* headerMap, std::vector* locations, int* locIndex) const { - std::vector locs; + std::vector newLocations; + auto iter = headerMap->find("Location"); if (iter != headerMap->end()) { - locs.push_back(iter->second); + auto range = headerMap->equal_range("Location"); + for (auto it = range.first; it != range.second; ++it) { + if (std::find(locations->begin(), locations->end(), it->second) == locations->end()) { + if (std::find(newLocations.begin(), newLocations.end(), it->second) == newLocations.end()) { + newLocations.push_back(it->second); + } + } + } } + // add alternative locations (not yet included) auto iter2 = headerMap->find("Content-Location"); if (iter2 != headerMap->end()) { auto range = headerMap->equal_range("Content-Location"); for (auto it = range.first; it != range.second; ++it) { - if (std::find(locs.begin(), locs.end(), it->second) == locs.end()) { - locs.push_back(it->second); + if (std::find(locations->begin(), locations->end(), it->second) == locations->end()) { + if (std::find(newLocations.begin(), newLocations.end(), it->second) == newLocations.end()) { + newLocations.push_back(it->second); + } } } } - return locs; + + // Insert location list at the current location index. This assures that the provided locations will be tried first. + locations->insert(locations->begin() + (*locIndex), newLocations.begin(), newLocations.end()); } std::vector CCDBDownloader::batchBlockingPerform(std::vector const& handleVector) diff --git a/CCDB/src/CcdbApi.cxx b/CCDB/src/CcdbApi.cxx index df05d393100d6..9e856a4fee85b 100644 --- a/CCDB/src/CcdbApi.cxx +++ b/CCDB/src/CcdbApi.cxx @@ -667,6 +667,23 @@ size_t header_map_callback(char* buffer, size_t size, size_t nitems, void* userd } } } + + // Keep only the first ETag encountered + if (key == "ETag") { + auto cl = headers->find("ETag"); + if (cl != headers->end()) { + insert = false; + } + } + + // Keep only the first Content-Type encountered + if (key == "Content-Type") { + auto cl = headers->find("Content-Type"); + if (cl != headers->end()) { + insert = false; + } + } + if (insert) { headers->insert(std::make_pair(key, value)); } @@ -1971,14 +1988,26 @@ void CcdbApi::vectoredLoadFileToMemory(std::vector& requestConte bool CcdbApi::loadLocalContentToMemory(o2::pmr::vector& dest, std::string& url) const { if (url.find("alien:/", 0) != std::string::npos) { - loadFileToMemory(dest, url, nullptr); // headers loaded from the file in case of the snapshot reading only - return true; + std::map localHeaders; + loadFileToMemory(dest, url, &localHeaders); + auto it = localHeaders.find("Error"); + if (it != localHeaders.end() && it->second == "An error occurred during retrieval") { + return false; + } else { + return true; + } } if ((url.find("file:/", 0) != std::string::npos)) { std::string path = url.substr(7); if (std::filesystem::exists(path)) { - loadFileToMemory(dest, path, nullptr); - return true; + std::map localHeaders; + loadFileToMemory(dest, url, &localHeaders); + auto it = localHeaders.find("Error"); + if (it != localHeaders.end() && it->second == "An error occurred during retrieval") { + return false; + } else { + return true; + } } } return false; From f40122dd4dab990d0f957116bb1003ec5d174b1f Mon Sep 17 00:00:00 2001 From: MichalTrzeb Date: Sun, 9 Mar 2025 15:38:24 +0100 Subject: [PATCH 2/2] Removing whitespaces --- CCDB/src/CCDBDownloader.cxx | 2 +- CCDB/src/CcdbApi.cxx | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/CCDB/src/CCDBDownloader.cxx b/CCDB/src/CCDBDownloader.cxx index ab5acd629ea5f..2f033a50b36e7 100644 --- a/CCDB/src/CCDBDownloader.cxx +++ b/CCDB/src/CCDBDownloader.cxx @@ -688,7 +688,7 @@ void CCDBDownloader::updateLocations(std::multimap* he } } } - + // Insert location list at the current location index. This assures that the provided locations will be tried first. locations->insert(locations->begin() + (*locIndex), newLocations.begin(), newLocations.end()); } diff --git a/CCDB/src/CcdbApi.cxx b/CCDB/src/CcdbApi.cxx index 9e856a4fee85b..2906438211c65 100644 --- a/CCDB/src/CcdbApi.cxx +++ b/CCDB/src/CcdbApi.cxx @@ -672,7 +672,7 @@ size_t header_map_callback(char* buffer, size_t size, size_t nitems, void* userd if (key == "ETag") { auto cl = headers->find("ETag"); if (cl != headers->end()) { - insert = false; + insert = false; } } @@ -680,7 +680,7 @@ size_t header_map_callback(char* buffer, size_t size, size_t nitems, void* userd if (key == "Content-Type") { auto cl = headers->find("Content-Type"); if (cl != headers->end()) { - insert = false; + insert = false; } }