在PHP中从DOM节点值创建数组



我试图创建一个数组的亚马逊产品变体使用DOM php,我想要的数组应该是;


["Variant Name":"ASIN number"]
下面是我的代码:
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, 'https://www.amazon.co.uk/dp/B08LZHMQXS?psc=1');
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'GET');
$html = curl_exec($ch)
curl_close($ch);

$dom = new DomDocument();
$dom->loadHTML($html);
$dom_xpath = new DOMXpath($dom);

$variants = $dom_xpath->query('//*[@class="swatchAvailable" or @class="swatchSelect"]');
foreach($variants as $data){
$input = $data->getAttribute("data-defaultasin");
$inputn = $data->getAttribute("title");
if (!empty($input)) {

preg_match_all('/(.{10})/', $input, $output);
$output1 = str_replace("Click to select ","|",$inputn);
$split = explode("|", $output1);

$json1->SizeVariant3[] = $split[1];    
$json1->SizeVariant4[] = $output[0][0];

$json->VariantB = array_combine($json1->SizeVariant3,$json1->SizeVariant4);
}    
}

虽然我的代码是工作的,但有一些错误的代码,它可能不会为所有的亚马逊产品工作。所以我需要建议和改进。输出是一个json对象

{
"2031 Deep Blue": "B08LZH84TN",
"2031 Khaki": "B08LZHMQXS",
}

而我希望它是一个数组,就像我上面提到的。

<?php       
function curl( $url=NULL ){
$cacert='c:/wwwroot/cacert.pem';    # download a copy from internet - https://curl.haxx.se/docs/caextract.html

$curl=curl_init();
if( parse_url( $url,PHP_URL_SCHEME )=='https' ){
curl_setopt( $curl, CURLOPT_SSL_VERIFYPEER, true );
curl_setopt( $curl, CURLOPT_SSL_VERIFYHOST, 2 );
curl_setopt( $curl, CURLOPT_CAINFO, $cacert );
curl_setopt( $curl, CURLOPT_CAPATH, $cacert );
}
curl_setopt( $curl, CURLOPT_URL,trim( $url ) );
curl_setopt( $curl, CURLOPT_AUTOREFERER, true );
curl_setopt( $curl, CURLOPT_FOLLOWLOCATION, true );
curl_setopt( $curl, CURLOPT_FAILONERROR, true );
curl_setopt( $curl, CURLOPT_HEADER, false );
curl_setopt( $curl, CURLINFO_HEADER_OUT, false );
curl_setopt( $curl, CURLOPT_RETURNTRANSFER, true );
curl_setopt( $curl, CURLOPT_BINARYTRANSFER, true );
curl_setopt( $curl, CURLOPT_CONNECTTIMEOUT, 20 );
curl_setopt( $curl, CURLOPT_TIMEOUT, 60 );
curl_setopt( $curl, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows NT 10.0; Curly-Wurly Ding-Dong' );
curl_setopt( $curl, CURLOPT_MAXREDIRS, 10 );
curl_setopt( $curl, CURLOPT_ENCODING, '' );

$res=(object)array(
'response'  =>  curl_exec( $curl ),
'info'      =>  (object)curl_getinfo( $curl ),
'errors'    =>  curl_error( $curl )
);
curl_close( $curl );
return $res;
}





$url='https://www.amazon.co.uk/dp/B08LZHMQXS?psc=1';

$res=curl( $url );
if( $res->info->http_code==200 ){

libxml_use_internal_errors( true );
$dom=new DOMDocument;

$dom->validateOnParse=false;
$dom->recover=true;
$dom->strictErrorChecking=false;
$dom->loadHTML( $res->response );
libxml_clear_errors();

$xp=new DOMXPath( $dom );
$expr='//*[@class="swatchAvailable" or @class="swatchSelect"]';

$tmp=array();

$col=$xp->query( $expr );
if( $col && $col->length > 0 ){
foreach( $col as $node ){
$asin=$node->getAttribute('data-defaultasin');
$title=str_replace( array( 'Click to select ', '|' ), '', $node->getAttribute('title') );
$tmp[$title]=$asin;
}
}
printf('<pre>%s</pre>',print_r($tmp,true));
}
?>

输出:

Array
(
[2031 Deep Blue] => B08LZH84TN
[2031 Khaki] => B08LZHMQXS
[2031 Light Grey] => B08LZFGGRL
[2031 Navy] => B08LZNGD5H
[2031 Deep Grey] => B08LZHZXDW
[2031 Wine Red] => B08LZHHGPD
[8636 All White] => B07PHQ69B7
[2031 All Black] => B08LZKXC3G
)

当用一个新url测试时,结果是:

Array
(
[Wine] => B07NYYZSWG
[Gold] => B07H4P7TZP
[Rose Gold] => B07H4ZMTML
[Silver Glitz] => B07P27Y9SQ
)

最新更新