将 XML 文件与编码 = "ISO-8859-1" 的新文件拆分



我有一个程序来获取一个大的XML文件并对其进行验证,然后将其拆分为较小的文件。 我得到的问题是新文件的编码是 UTF-8 .我在 ISO-8859-1 中需要它们

赫兹代码

public class SplitMain {
public static void main(String [] args) throws Exception {
    validateInputFile("D:/sanket/cms_dev/XmlSplitSample/src/inputFile/");
    File input = new File("D:/sanket/cms_dev/XmlSplitSample/src/inputFile/sample.xml");
    DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
    Document doc = dbf.newDocumentBuilder().parse(input);
    XPath xpath = XPathFactory.newInstance().newXPath();
    NodeList nodes = (NodeList) xpath.evaluate("//DataFile/Contact", doc, XPathConstants.NODESET);
    int itemsPerFile = 5;
    int fileNumber = 0;
    Document currentDoc = dbf.newDocumentBuilder().newDocument();
    Node rootNode = currentDoc.createElement("DataFile");
    File currentFile = new File("nufile"+fileNumber+".xml");
    for (int i=1; i <= nodes.getLength(); i++) {
        Node imported = currentDoc.importNode(nodes.item(i-1), true);
        rootNode.appendChild(imported);
        if (i % itemsPerFile == 0) {
            writeToFile(rootNode, currentFile);
            rootNode = currentDoc.createElement("DataFile");
            currentFile = new File("nufile"+(++fileNumber)+".xml");
            System.out.println(currentFile);
        }
    }
    writeToFile(rootNode, currentFile);
}
private static void writeToFile(Node node, File file) throws Exception {
    Transformer transformer = TransformerFactory.newInstance().newTransformer();
    transformer.transform(new DOMSource(node), new StreamResult(new FileWriter(file)));
}
private static void validateInputFile(String WORK_DIRECTORY)
{
    //String workingDir=config.getProperty(WORK_DIRECTORY);//comment for automating the process
    String workingDir=WORK_DIRECTORY;//added for automating the process
    String finalString = null;
    File folder = new File(workingDir);
    if (folder.isFile())
    {
        System.out.println("watever");
        return ;
    }
    String[] fileNameArray = folder.list();
    String xmlExtension=".xml";
    for (String fileName : fileNameArray) {

            try{
                //XMLtoString
                BufferedReader br = new BufferedReader(new FileReader(new File(workingDir + "/" +fileName))); 
                String line;
                StringBuilder stringBuilder = new StringBuilder();
                while((line=br.readLine())!= null)
                {
                    stringBuilder.append(line.trim()); 
                } 
                finalString = stringBuilder.toString();
                StringBuilder sb = new StringBuilder();
                if(finalString == null)
                    return;
               System.out.println(finalString);
                for(int i=0;i<finalString.length();i++)
                {
                   if (finalString.charAt(i) == '&')
                    {
                       sb.append("&amp;");
                    }
                    else
                    {
                        sb.append(finalString.charAt(i));
                    }
                }
                finalString=sb.toString();
                //StringToXML
                DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();   
                System.out.println(finalString);
                DocumentBuilder builder=factory.newDocumentBuilder();
                Document document = builder.parse( new InputSource(new StringReader( finalString ) ) ); 
                TransformerFactory tranFactory = TransformerFactory.newInstance();   
                Transformer aTransformer = tranFactory.newTransformer(); 
                Source src = new DOMSource(document); 
                Result dest = new StreamResult( new File( workingDir + "/" +fileName) );
                aTransformer.transform( src, dest ); 
            }
                catch (Exception e) {
                e.printStackTrace();
                }

    }
}

}

您需要指定转换器编码,例如:

transformer.setOutputProperty(OutputKeys.ENCODING, "ISO-8859-1");

然后使用流写出(如果你使用编写器,它将通过另一个不需要的编码层)

最新更新