如何在Javafx中迭代DOM并找到IMG和SRC标签


/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */
package imagesget;
import java.io.StringWriter;
import java.util.logging.Level;
import java.util.logging.Logger;
import javafx.application.Application;
import javafx.beans.value.ChangeListener;
import javafx.beans.value.ObservableValue;
import javafx.concurrent.Worker;
import javafx.scene.layout.HBox;
import javafx.scene.layout.StackPane;
import javafx.scene.web.WebEngine;
import javafx.scene.web.WebView;
import javafx.stage.Stage;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.w3c.dom.Document;
import org.w3c.dom.NodeList;


/**
 *
 * @author biznis
 */
/**
 *
 * @author biznis
 */
public class ImagesGet extends Application {
    /**
     * @param args the command line arguments
     */
    @Override
    public void start(Stage primaryStage) throws Exception {

        StackPane root = new StackPane();
        // create a HBox to hold 2 vboxes        
          HBox hbox = new HBox(10);
        // create a vbox with a textarea that grows vertically
       // HBox vbox = new VBox(10);
        //Label label1 = new Label("");
        final WebView browser = new WebView();
        final WebEngine wb = browser.getEngine();
    //grid.add(new Label("Input Url: "), 0, 0);
   // grid.add(notification, 1, 0);
        wb.load("http://epaper.timesgroup.com/Olive/ODN/TheEconomicTimes/#");
        wb.getLoadWorker().stateProperty().addListener(
            new ChangeListener<Worker.State>() {
                @Override
                public void changed(ObservableValue ov, Worker.State oldState, Worker.State newState) {
                    if (newState == Worker.State.SUCCEEDED) {
                        Document doc =   wb.getDocument();
                        try {
                            Transformer transformer = TransformerFactory.newInstance().newTransformer();
                            transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "no");
                            transformer.setOutputProperty(OutputKeys.METHOD, "xml");
                            transformer.setOutputProperty(OutputKeys.INDENT, "yes");
                            transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
                            transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "4");
                            StringWriter stringWriter = new StringWriter();
                            try {
                                transformer.transform(new DOMSource(doc),
                                        new StreamResult(stringWriter));
                            } catch (TransformerException ex) {
                                Logger.getLogger(ImagesGet.class.getName()).log(Level.SEVERE, null, ex);
                            }
                            String xml1 = stringWriter.getBuffer().toString();
                            System.out.println(xml1);
NodeList anchors = doc.getElementsByTagName("img");
System.out.println(anchors);
                        }catch (TransformerConfigurationException ex) {
                            Logger.getLogger(ImagesGet.class.getName()).log(Level.SEVERE, null, ex);
                        } 
                    }
                }
            });
    }
    public static void main(String[] args) {
        Application.launch(args);
    }
}

这是我尝试的,但是通过这个

NodeList anchors = doc.getElementsByTagName("img"); 
System.out.println(anchors);

我获得输出是

com.sun.webkit.dom.nodelistimpl@614d6ab6

如何获得IMG标签因此,任何人都可以告诉我如何从外部HTML找到IMG标签并在本地下载所有图像,因此请告诉我任何文档或任何方法,因此我可以实现此目的。

您正在将org.w3c.dom.NodeList类型的对象打印为System.out。这就是为什么NodeList.toString()被调用并打印(恰好是对象的名称)。

我认为您想与节点自己一起工作。此代码应该为您提供正确的想法如何处理列表的节点:

NodeList anchors = doc.getElementsByTagName("img");
for (int index=0; index<anchors.getLength(); index++) {
    Node node = anchors.item(index);
    System.out.println(node.getNodeName());
    // ....
}

请参阅完整的Javadoc,以了解您可以使用org.w3c.dom.Node

最新更新