curl_multi_exec需要很长时间才能做出回应



我的项目需要使用 curl multi execute 对存储在数组中的每个用户名点击一个 URL。用户名数组的大小几乎是 45k,到目前为止,我已经创建了另一个我想要命中的 45k url 数组,然后为了有效地发送请求,我将该 url 数组分解为每个大小为 200 的块。然后我已经将每个分块数组传递给multi_curl_execute以获得响应,但问题是接收所有 45k 请求的响应需要太多时间。我已经打印了响应数组,它按预期不断增加,但打印所有响应需要太多时间。请帮助我,因为我必须在明天之前实现我的目标。我将在下面承担很多义务,因为我的代码

$array1=[1,2,3,4,5,6.....45000];

现在创建 URL 并将每个用户名作为查询字符串

foreach($array1 as $arr)
{
$url[]='abc.com?u='.$arr;
}

创建块

$chunk[]=array_chunk($url,200,true);

现在发送每个块

for($i=0;$i<sizeof($chunk);$i++)
{
foreach($chunk[$i] as $c_arr)
{
array_push($res,multiRequest($c_arr));
}
}

我的multi_curl函数

function multiRequest($data,$options = array())
{
$curly = array();
$result = array();
$mh = curl_multi_init();
$ua = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/525.13 (KHTML, like Gecko) Chrome/0.A.B.C Safari/525.13';
foreach ($data as $id => $d) 
{
$curly[$id]= curl_init();
curl_setopt($curly[$id], CURLOPT_URL,$d);
curl_setopt($curly[$id], CURLOPT_RETURNTRANSFER,true);
curl_setopt($curly[$id], CURLOPT_USERAGENT, $ua);
curl_setopt($curly[$id], CURLOPT_AUTOREFERER, true);
curl_setopt($curly[$id], CURLOPT_FOLLOWLOCATION, true);
curl_setopt($curly[$id], CURLOPT_MAXREDIRS, 20);
curl_setopt($curly[$id], CURLOPT_HTTPGET, true);
curl_setopt($curly[$id], CURLOPT_HEADER,0);
curl_setopt($curly[$id], CURLOPT_RETURNTRANSFER,1);
curl_multi_add_handle($mh, $curly[$id]);
}
$running = null;
do {
curl_multi_exec($mh, $running);
} while($running > 0);
foreach($curly as $id => $c) 
{
$result[$id] = curl_multi_getcontent($c);
curl_multi_remove_handle($mh, $c);
}
curl_multi_close($mh);
return $result;
}

请告诉我我该怎么办,因为交付所有 25 个请求的响应几乎需要 30-45000 分钟。现在我正在我的本地机器上运行这个脚本,而稍后它将被安排为实时服务器上的 cron 作业

您是否尝试过多处理而不是curl_multi?也许这更快?不会是第一次。

尝试

<?php
$code = <<<'CODE'
<?php
$ch=curl_init();
curl_setopt_array($ch,array(
CURLOPT_URL=>'abc.com?u='.urlencode($argv[1]),
CURLOPT_ENCODING=>"",
CURLOPT_USERAGENT=>'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/525.13 (KHTML, like Gecko) Chrome/0.A.B.C Safari/525.13',
CURLOPT_AUTOREFERER=>true,
CURLOPT_FOLLOWLOCATION=>true,
CURLOPT_MAXREDIRS=>20
));
curl_exec($ch);
curl_close($ch);
CODE;
$jobFileh = tmpfile ();
$jobFile = stream_get_meta_data ( $jobFileh ) ['uri'];
file_put_contents ( $jobFile, $code );
$jobs = array ();
for($i = 1; $i <= 45000; ++ $i) {
$jobs [] = '/usr/bin/php ' . escapeshellarg ( $jobFile ) . ' ' . escapeshellarg ( ( string ) $i );
}
$starttime = microtime ( true );
$ret = hhb_exec_multi1 ( $jobs, 200 );
$seconds_used = microtime ( true ) - $starttime;
var_dump ( $ret, $seconds_used );
die ();
class hhb_exec_multi1_ret {
public $cmd;
public $ret;
public $stdout;
public $stderr;
function __construct(array $attributes) {
foreach ( $attributes as $name => $val ) {
$this->$name = $val;
}
}
}
/**
*
* @param string[] $cmds
* @param int $max_concurrent
* @throws InvalidArgumentException
* @return hhb_exec_multi1_ret[]
*/
function hhb_exec_multi1(array $cmds, int $max_concurrent = 10, $finished_callback = NULL): array {
// TODO: more error checking, if proc_create fail, out of ram, tmpfile() fail, etc
{
// input validation
if ($max_concurrent < 1) {
throw new InvalidArgumentException ( '$max_concurrent must be above 0... and less or equal to' . PHP_INT_MAX );
}
foreach ( $cmds as $tmp ) {
if (! is_string ( $tmp )) {
throw new InvalidArgumentException ( '$cmds must be an array of strings!' );
}
}
}
$ret = array ();
$running = array ();
foreach ( $cmds as $key => $cmd ) {
$current = array (
'cmd' => $cmd,
'ret' => - 1,
'stdout' => tmpfile (),
'stderr' => tmpfile (),
'key' => $key 
);
$pipes = [ ];
$descriptorspec = array (
0 => array (
"pipe",
"rb" 
),
1 => array (
"file",
stream_get_meta_data ( $current ['stdout'] ) ['uri'],
"wb" 
),
2 => array (
"file",
stream_get_meta_data ( $current ['stderr'] ) ['uri'],
"wb" 
)  // stderr is a file to write to
);
while ( count ( $running ) >= $max_concurrent ) {
// echo ".";
usleep ( 100 * 1000 );
foreach ( $running as $runningkey => $check ) {
$stat = proc_get_status ( $check ['proc'] );
if ($stat ['running']) {
continue;
}
proc_close ( $check ['proc'] );
$check ['ret'] = $stat ['exitcode'];
$stdout = file_get_contents ( stream_get_meta_data ( $check ['stdout'] ) ['uri'] );
fclose ( $check ['stdout'] );
$check ['stdout'] = $stdout;
$stderr = file_get_contents ( stream_get_meta_data ( $check ['stderr'] ) ['uri'] );
fclose ( $check ['stderr'] );
$check ['stderr'] = $stderr;
$checkkey = $check ['key'];
unset ( $check ['key'] );
unset ( $check ['proc'] );
$tmp = ($ret [$checkkey] = new hhb_exec_multi1_ret ( $check ));
unset ( $running [$runningkey] );
if (! empty ( $finished_callback )) {
$finished_callback ( $tmp );
}
}
}
$current ['proc'] = proc_open ( $cmd, $descriptorspec, $pipes );
fclose ( $pipes [0] ); // do it like this because we don't want the children to inherit our stdin, which is the default behaviour if [0] is not defined.
$running [] = $current;
}
while ( count ( $running ) > 0 ) {
// echo ",";
usleep ( 100 * 1000 );
foreach ( $running as $runningkey => $check ) {
$stat = proc_get_status ( $check ['proc'] );
if ($stat ['running']) {
continue;
}
proc_close ( $check ['proc'] );
$check ['ret'] = $stat ['exitcode'];
$stdout = file_get_contents ( stream_get_meta_data ( $check ['stdout'] ) ['uri'] );
fclose ( $check ['stdout'] );
$check ['stdout'] = $stdout;
$stderr = file_get_contents ( stream_get_meta_data ( $check ['stderr'] ) ['uri'] );
fclose ( $check ['stderr'] );
$check ['stderr'] = $stderr;
$checkkey = $check ['key'];
unset ( $check ['key'] );
unset ( $check ['proc'] );
$tmp = ($ret [$checkkey] = new hhb_exec_multi1_ret ( $check ));
unset ( $running [$runningkey] );
if (! empty ( $finished_callback )) {
$finished_callback ( $tmp );
}
}
}
return $ret;
}

当我在笔记本电脑上将此代码运行到本地nginx服务器时,它在6分39秒(399秒)内执行,循环设置为45000。

编辑:WUPS,忘记将代码写入作业文件(file_put_contents),已修复。

最新更新