-
-
Notifications
You must be signed in to change notification settings - Fork 9.6k
[HttpClient] Never process more than 300 requests at the same time #38690
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
For the record also, here is the script I use to run the requests. <?php
use Symfony\Component\HttpClient\AmpHttpClient;
use Symfony\Component\HttpClient\CurlHttpClient;
use Symfony\Component\HttpClient\NativeHttpClient;
use Symfony\Component\HttpClient\Exception\TransportException;
use Symfony\Component\Finder\Finder;
use Symfony\Component\Lock\Factory;
use Symfony\Component\Lock\Store\FlockStore;
ini_set('memory_limit', '3G');
ini_set('display_errors', true);
ini_set('error_reporting', -1);
ini_set('date.timezone', 'UTC');
require __DIR__ . '/vendor/autoload.php';
set_error_handler(function ($errno, $errstr, $errfile, $errline) {
throw new \ErrorException($errstr, $errno, E_ERROR, $errfile, $errline);
});
class Mirror {
private $client;
private $url = 'https://repo.packagist.org';
private $hostname = 'repo.packagist.org';
public function sync()
{
# $this->client = new AmpHttpClient();
$this->client = new CurlHttpClient();
# $this->client = new NativeHttpClient();
$rootResp = $this->download('/packages.json');
//if ($rootResp->getHeaders()['content-encoding'][0] !== 'gzip') {
// throw new \Exception('Expected gzip encoded responses, something is off');
//}
$rootData = $rootResp->getContent();
$rootJson = json_decode($rootData, true);
if (null === $rootJson) {
throw new \Exception('Invalid JSON received for file /packages.json: '.$rootData);
}
$requests = [];
foreach ($rootJson['provider-includes'] as $listing => $opts) {
$listing = str_replace('%hash%', $opts['sha256'], $listing);
$listingResp = $this->download('/'.$listing);
$listingData = $listingResp->getContent();
if (hash('sha256', $listingData) !== $opts['sha256']) {
throw new \Exception('Invalid hash received for file /'.$listing);
}
$listingJson = json_decode($listingData, true);
if (null === $listingJson) {
throw new \Exception('Invalid JSON received for file /'.$listing.': '.$listingData);
}
foreach ($listingJson['providers'] as $pkg => $opts) {
$provPath = '/p/'.$pkg.'$'.$opts['sha256'].'.json';
$provAltPath = '/p/'.$pkg.'.json';
$provPathV2 = '/p2/'.$pkg.'.json';
$provPathV2Dev = '/p2/'.$pkg.'~dev.json';
$userData = [$provPath, $provAltPath, $opts['sha256']];
$requests[] = ['GET', $this->url.$provPath, ['user_data' => $userData]];
$headers = rand(0,1) === 0 ? ['If-Modified-Since' => gmdate('D, d M Y H:i:s T')] : [];
$userData = [$provPathV2, null, null];
$requests[] = ['GET', $this->url.$provPathV2, ['user_data' => $userData, 'headers' => $headers]];
$headers = rand(0,1) === 0 ? ['If-Modified-Since' => gmdate('D, d M Y H:i:s T')] : [];
$userData = [$provPathV2Dev, null, null];
$requests[] = ['GET', $this->url.$provPathV2Dev, ['user_data' => $userData, 'headers' => $headers]];
if (\count($requests) > 10000) { #XXX change here the max num of requests to do
break 2;
}
}
$listingsToWrite['/'.$listing] = [$listingData, strtotime($listingResp->getHeaders()['last-modified'][0])];
}
$hasFailedRequests = false;
echo count($requests), "\n";
while ($requests && !$hasFailedRequests) {
$responses = [];
foreach (array_splice($requests, 0, 1000) as $i => $req) { #XXX change here the max num of requests per batch
$responses[] = $this->client->request(...$req);
}
$i = 0;
foreach ($this->client->stream($responses) as $response => $chunk) {
try {
// TODO isLast here can probably be dropped after we upgrade to symfony 4.3.5 with https://github.com/symfony/symfony/pull/33643
if ($chunk->isFirst()/* || $chunk->isLast()*/) {
if ($response->getStatusCode() === 304) {
echo '3';
$response->cancel();
continue;
}
if ($response->getStatusCode() === 404) {
$userData = $response->getInfo('user_data');
// provider v2
if (null === $userData[1]) {
echo '4';
$response->cancel();
continue;
}
}
}
if ($chunk->isLast()) {
echo '.';
$userData = $response->getInfo('user_data');
if (null === $userData[1]) { // provider v2
$providerData = $response->getContent();
if (null === json_decode($providerData, true)) {
// abort all responses to avoid triggering any other exception then throw
array_map(function ($r) { $r->cancel(); }, $responses);
echo 'X1';
dump($response);
dump(\strlen($providerData));
throw new \Exception('Invalid JSON received for file '.$userData[0]);
}
} else { // provider v1
$providerData = $response->getContent();
if (null === json_decode($providerData, true)) {
// abort all responses to avoid triggering any other exception then throw
array_map(function ($r) { $r->cancel(); }, $responses);
echo 'X2';
throw new \Exception('Invalid JSON received for file '.$userData[0]);
}
if (hash('sha256', $providerData) !== $userData[2]) {
// abort all responses to avoid triggering any other exception then throw
array_map(function ($r) { $r->cancel(); }, $responses);
echo 'X3';
throw new \Exception('Invalid hash received for file '.$userData[0]);
}
}
}
} catch (\Throwable $e) {
dump($e->getMessage());
}
}
}
if ($hasFailedRequests) {
return false;
}
return true;
}
private function download($file)
{
try {
$resp = $this->client->request('GET', $this->url.$file);
// trigger throws if needed
$resp->getContent();
} catch (TransportException $e) {
// retry once
usleep(10000);
$resp = $this->client->request('GET', $this->url.$file);
// trigger throws if needed
$resp->getContent();
}
if ($resp->getStatusCode() >= 300) {
throw new \RuntimeException('Failed to fetch '.$file.' => '.$response->getStatusCode() .' '. $response->getContent());
}
return $resp;
}
}
try {
$mirror = new Mirror();
echo ($mirror->sync() ? 'SUCCESS' : 'FAIL').PHP_EOL;
} catch (\Throwable $e) {
echo 'Failed at '.date('Y-m-d H:i:s');
throw $e;
} |
I'm closing as explained. Feel free to take over in any way. |
@nicolas-grekas I can't reproduce hanging with Curl locally, maybe I need a better connection? I've increased the request numbers, but still no hanging. As I don't want to make too many requests, I have replaced the inner concurrent requests with |
@kelunik if you hit localhost you got unlimited bandwidth which might not result in hanging in the same pattern I guess.. I mean feel free to hit packagist with the original script, especially during the weekend we have lots of bandwidth to spare. |
@Seldaek Thanks, that's good to hear. I can reproduce the hanging with Curl. Amp runs into an out of memory error, but seems to run "fine" if I increase the memory to 5G. I've increased the request count and batch size to 100k. It's interesting that both Curl and Amp are limited by CPU time at that point. Amp spends 22.5% in the GC and around 10% in URL parsing. |
@Seldaek @nicolas-grekas Thanks for the ping, there's some strange server behavior that uncovered a couple of bugs in Here's what happens: The server closes the HTTP/2 connection after 1000 requests with a However, the server doesn't always send a After many requests, the script gets slower. One thing I noticed is that the server closes the connection after about 600 requests then, which might be some flood detection / prevention. I've run my tests with the script ported to use Your script based on |
Thanks for investigating, I'm happy this helped identify some bugs.
The issue with We could call What we could do is to always reset the h2 stream when any timeout is raised. How can we do that from the Symfony side? |
@nicolas-grekas This is a good call. I think the inactivity timeout shouldn't apply if the client is busy, but rather only if the receive buffer is empty and no new data is received. |
…s break (nicolas-grekas) This PR was merged into the 4.4 branch. Discussion ---------- [HttpClient] don't fallback to HTTP/1.1 when HTTP/2 streams break | Q | A | ------------- | --- | Branch? | 4.4 | Bug fix? | yes | New feature? | no | Deprecations? | no | Tickets | Fix composer/composer#9481 | License | MIT | Doc PR | - With this change, I don't reproduce the failures that I describe in composer/composer#9481 when running the script in #38690 Apparently curl has an issue when both h1.1 and h2 connections are open to the same host. Instead of switching to HTTP/1.1 when retrying requests that failed because of an HTTP/2 stream error, I propose to close the http/2 connection when issuing a retry. With this change, running the mirroring script of packagist works like a charm. No need to investigate your mirrors @Seldaek, this was definitely a data corruption issue. Commits ------- 0c92bc5 [HttpClient] don't fallback to HTTP/1.1 when HTTP/2 streams break
@nicolas-grekas Thanks again for the ping. It turns out there were a couple of more bugs:
These have now been fixed in the following releases: |
@nicolas-grekas Another week, another update:
|
Perfect, thanks for the update, this triggered many improvements apparently :) |
Almost one year ago, I wrote this patch to put curl handles on an internal queue when more than 300 requests are run concurrently.
I have a reproducing script from @Seldaek that mirrors packagist instances and that makes roughly a million requests while doing so.
This script is able to chunk the requests by batches, e.g completing 500 reqs, then 500 again, etc. until the end.
Batching is required because curl/the network hangs otherwise when concurrency goes high (>5k).
I wrote this patch hoping to remove the need for batching requests in smaller chunks. It works, but at some point it still hangs.
I'm therefore submitting this patch for the record as it would need more work to be made bulletproof under the said situation. Since batching just works well enough, this might not be needed after all.
/cc @fancyweb @jderusse FYI since you're interested in high throughput uses of HttpClient.
Note that neither AmpHttpClient is able to handle the load here. /cc @kelunik