通过dart/flutter从Google抓取图像获得[]



你好,我想从谷歌抓取图像,但我得到[]作为列表的结果,我不能弄清楚,请参阅下面的代码。

主要方法

void main(List<String> arguments) async {
print('Hello world: ${web_scrapping.calculate()}!');
await getImageByDom('hyderabad');
print(imageList);
}

* *获取图像的方法**


List imageList;
void getImageByDom(String city) async {
final response = await http.get(
'https://www.google.com.pk/search?q=$city+city&tbm=isch&ved=2ahUKEwiK5_jmqt_uAhWE4oUKHSH-DRYQ2-cCegQIABAA');
var document = parser.parse(response.body);
final elements = await document.getElementsByClassName('bRMDJf');
imageList = elements
.map((a) => a.getElementsByTagName('img')[0].attributes['src'])
.toList();
}

getImageByDom应该返回一个List<String>而不是void。

这里有一个完整的例子

import 'dart:async';
import 'dart:io';
import 'package:http/http.dart' as http;
import 'package:html/parser.dart' show parse;
import 'package:html/dom.dart';
Future<void> main() async {
List<String> imageList = await getImageByDom('hyderabad');
print(imageList);
}
Future<List<String>> getImageByDom(String city) async {
final Map<String, String> headers = {
HttpHeaders.userAgentHeader:
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Safari/537.36'
};
final response = await http.get(
'https://www.google.com.pk/search?q=$city+city&tbm=isch&ved=2ahUKEwiK5_jmqt_uAhWE4oUKHSH-DRYQ2-cCegQIABAA',
headers: headers);
if (response.statusCode != HttpStatus.ok) {
return [];
}
var document = parse(response.body);
final elements = await document.querySelectorAll('.bRMDJf');
return elements.map((a) => a.querySelector('img').attributes['src']).toList();
}

[data:image/gif;base64,R0lGODlhAQABAIAAAP///////yH5BAEKAAEALAAAAAABAAEAAAICTAEAOw==, data:image/gif;base64,R0lGODlhAQABAIAAAP///////yH5BAEKAAEALAAAAAABAAEAAAICTAEAOw==, data:image/gif;base64,R0lGODlhAQABAIAAAP///////yH5BAEKAAEALAAAAAABAAEAAAICTAEAOw==, data:image/gif;base64,R0lGODlhAQABAIAAAP///////yH5BAEKAAEALAAAAAABAAEAAAICTAEAOw==, data:image/gif;base64,R0lGODlhAQABAIAAAP///////yH5BAEKAAEALAAAAAABAAEAAAICTAEAOw==, data:image/gif;base64,R0lGODlhAQABAIAAAP///////yH5BAEKAAEALAAAAAABAAEAAAICTAEAOw==, data:image/gif;base64,R0lGODlhAQABAIAAAP///////yH5BAEKAAEALAAAAAABAAEAAAICTAEAOw==, data:image/gif;base64,R0lGODlhAQABAIAAAP///////yH5BAEKAAEALAAAAAABAAEAAAICTAEAOw==, data:image/gif;base64,R0lGODlhAQABAIAAAP///////yH5BAEKAAEALAAAAAABAAEAAAICTAEAOw==, data:image/gif;base64,R0lGODlhAQABAIAAAP///////yH5BAEKAAEALAAAAAABAAEAAAICTAEAOw==, data:image/gif;base64,R0lGODlhAQABAIAAAP///////yH5BAEKAAEALAAAAAABAAEAAAICTAEAOw==, data:image/gif;base64,R0lGODlhAQABAIAAAP///////yH5BAEKAAEALAAAAAABAAEAAAICTAEAOw==, data:image/gif;base64,R0lGODlhAQABAIAAAP///////yH5BAEKAAEALAAAAAABAAEAAAICTAEAOw==, data:image/gif;base64,R0lGODlhAQABAIAAAP///////yH5BAEKAAEALAAAAAABAAEAAAICTAEAOw==, data:image/gif;base64,R0lGODlhAQABAIAAAP///////yH5BAEKAAEALAAAAAABAAEAAAICTAEAOw==, data:image/gif;base64,R0lGODlhAQABAIAAAP///////yH5BAEKAAEALAAAAAABAAEAAAICTAEAOw==, data:image/gif;base64,R0lGODlhAQABAIAAAP///////yH5BAEKAAEALAAAAAABAAEAAAICTAEAOw==, data:image/gif;base64,R0lGODlhAQABAIAAAP///////yH5BAEKAAEALAAAAAABAAEAAAICTAEAOw==, data:image/gif;base64,R0lGODlhAQABAIAAAP///////yH5BAEKAAEALAAAAAABAAEAAAICTAEAOw==, data:image/gif;base64,R0lGODlhAQABAIAAAP///////yH5BAEKAAEALAAAAAABAAEAAAICTAEAOw==, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null]

但它提取1x1占位符图像,因为

  • Google通过_defdJS函数注入部分rg_meta标签。您需要提取并解码它的参数
  • Google Images也包含直接作为HTML标签的图像,而不是。rg_meta。你可以把它们当作普通的HTML标签来解析。

来自Google Images注入标签的部分Google JS代码。第一个参数defd1是要用内容替换的占位符HTML元素的ID。第二个参数是编码后的HTML字符串。

_defd('defd1','x3cdiv  jsactionx3dx22IE7JUb:e5gl8b;MW7oTe:fL5Ibf;dtRDof:s370ud;R3mad:ZCNXMe;v03O1c:cJhY7b;x22 data-vedx3dx222ahUKEwiGmK7a3szoAhVDB3cKHesrAQoQMygVegQIARBYx22  data-ictxx3dx221x22 data-idx3dx229pyRez7A01yLeMx22 jsnamex3dx22N9Xkfex22 data-rix3dx2221x22 classx3dx22isv-r PNCib MSM1fd BUooTdx22 jscontrollerx3dx22SI4J6cx22 jsmodelx3dx22uZbpBf sB4qxcx22 jsdatax3dx22j0Opre;9pyRez7A01yLeM;27x22 stylex3dx22width:283px;x22 data-tbnidx3dx229pyRez7A01yLeMx22  data-ctx3dx220x22 data-cbx3dx220x22 data-clx3dx220x22 data-crx3dx223x22 data-twx3dx22299x22 data-owx3dx221100x22 data-ohx3dx22619x22x3ex3ca classx3dx22wXeWr islib nfEiy mM5pbdx22 jsnamex3dx22sTFXNdx22 jsactionx3dx22click:J9iaEb;x22 jsactionx3dx22mousedown:npT2md; touchstart:npT2md;x22 data-navx3dx221x22 tabindexx3dx220x22 stylex3dx22height:164px;x22x3ex3cdiv classx3dx22bRMDJf islirx22 jsnamex3dx22DeysSex22 stylex3dx22background:rgb(200,194,187);width:292px; height:164px;margin-left:0px; margin-right:-9px;x22 jsactionx3dx22mousedown:npT2md; touchstart:npT2md;x22x3ex3cimg classx3dx22rg_i Q4LuWd tx8vtfx22 data-srcx3dx22https://encrypted-tbn0.gstatic.com/images?qx3dtbn%3AANd9GcQ3_h7vxb_72egN52iUj6J4TQs7XajUVhI6BxP_tMepqnFdcGL1x26amp;usqpx3dCAUx22 data-ltx3dx22x22  jsnamex3dx22Q4LuWdx22  altx3dx22Jamaican Coffee Companies Devise Plan to Court US Market - OECS ...x22 /x3ex3c/divx3ex3cdiv classx3dx22c7cjWcx22x3ex3c/divx3ex3cdiv classx3dx22PiLIecx22 jsactionx3dx22click: gFs2Rex22x3ex3c/divx3ex3c/ax3ex3ca classx3dx22VFACy kGQApx22 data-vedx3dx222ahUKEwiGmK7a3szoAhVDB3cKHesrAQoQr4kDegQIARBZx22 jsnamex3dx22uy6aldx22 relx3dx22noopenerx22 targetx3dx22_blankx22 hrefx3dx22http://oecsbusinessfocus.com/jamaican-coffee-companies-devise-plan-to-court-us-market/x22 jsactionx3dx22focus:kvVbVb; mousedown:kvVbVb; touchstart:kvVbVb;x22x3ex3cdiv classx3dx22sMi44c lNHeqex22x3ex3cdiv classx3dx22WGvvNbx22 x3eJamaican Coffee Companies Devise Plan ...x3c/divx3ex3cdiv classx3dx22fxgdkex22x3eoecsbusinessfocus.comx3c/divx3ex3c/divx3ex3c/ax3ex3c/divx3e')

或者,您可以通过SerpApi的Google images API获取图像。这是一个免费试用的付费API。

完整的示例

import 'dart:async';
import 'dart:convert';
import 'dart:io' show Platform, HttpStatus;
import 'package:http/http.dart' as http;
Future<void> main() async {
Map<String, String> envVars = Platform.environment;
final uri = Uri.https('serpapi.com', '/search', {
'q': 'Coffee',
'google_domain': 'google.com.pk',
'gl': 'pk',
'tbm': 'isch',
'api_key': envVars['API_KEY'],
});
final response = await http.get(uri);
final convertedResponse = jsonDecode(response.body);
for (final imageResult in convertedResponse['images_results']) {
print("""
Title: ${imageResult['title']}
Thumbnail: ${imageResult['thumbnail']}
""");
}
}

Title: New Study Says 25 Cups Is Fine ...fortune.com
Thumbnail: https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcSEQHSv8xWAiRTaXUcY0VbPy2BecsEZ2KpsPQ&usqp=CAU
Title: Positive and Negative Effects of Caffeineaarp.org
Thumbnail: https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcQzN08BEkD0a2Gl7D1vyFUo-jHmhIvlH9FUXQ&usqp=CAU
Title: How is Coffee Decaffeinated?cafedirect.co.uk
Thumbnail: https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcT6Tcm59kufV9EkMEOkf9JErWrj0tSMwOCAIA&usqp=CAU
Title: Colombian Coffee Buzz: What's So Great ...ictcoffee.com
Thumbnail: https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcQGOKd6N93WdEzpuxpopFJlCXZFqK7YBHofiA&usqp=CAU
Stripped...

:我在SerpApi工作。

最新更新