Skip to main content
 首页 » 编程设计

java之将 XML 文件拆分为多个文件,每个文件有 500 个标签

2024年04月12日28wayfarer

我有一个大文件 (1 GB),需要拆分成较小的文件。我希望每个较小的文件包含 <OFFER> 中的 500 个标签。

这是大型 XML 文件的一小段:

<?xml version="1.0"?><RESULT> 
<header> 
    <site>http://www.thomascook.fr</site> 
    <marque>ThomasCook France</marque> 
    <logo>http://www.example.com/example.gif</logo> 
</header> 
<OFFER> 
    <IFF>5810</IFF> 
    <TO>TCF</TO> 
    <COUNTRY>Chypre</COUNTRY> 
    <REGION>Chypre du Sud</REGION> 
    <HOTELNAME>Elias Beach &amp; Country Club</HOTELNAME> 
    <DESCRIPTION>....</DESCRIPTION> 
    <TYPE>Sejour</TYPE> 
    <STARS>5.0</STARS> 
    <THEMAS>Plage directe;Special enfant;Bien-Etre-Fitness</THEMAS> 
    <THUMBNAIL>http://example.com/example.jpg</THUMBNAIL> 
    <URL>http://example.com/example.html</URL> 
    <DATE> 
        <BROCHURE>TCFB</BROCHURE> 
        <DURATION>7</DURATION> 
        <DURATION_VAR>6_6-9</DURATION_VAR> 
        <BOARD>Demi-pension</BOARD> 
        <DEPARTURE>27.2.2011</DEPARTURE> 
        <RETURN>6.3.2011</RETURN> 
        <DEPARTURE_CITY>PAR</DEPARTURE_CITY> 
        <ARRIVAL_CITY>LCA</ARRIVAL_CITY> 
        <PRICE>790</PRICE> 
        <URL>http://example.com/other-example.html</URL> 
    </DATE> 
</OFFER> 
<OFFER> 
  (etc) 
</OFFER> 

我该怎么做?

请您参考如下方法:

从你的英语我了解到你想要将一个大的 XML 文件拆分成多个小文件。最好的是http://vtd-xml.sourceforge.net/

示例代码,下面的代码将根据XPath、TopTag/ChildTag拆分大的xml

 
import java.io.File; 
import java.io.FileOutputStream; 
 
import com.ximpleware.AutoPilot; 
import com.ximpleware.FastLongBuffer; 
import com.ximpleware.VTDGen; 
import com.ximpleware.VTDNav; 
 
// This example shows how to split XML 
public class Split { 
    public static void main(String[] args) { 
        String prefix = "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\n<TopTag xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\">\n"; 
        String suffix = "\n</TopTag<"; 
        try { 
 
            VTDGen vg = new VTDGen(); 
            if (vg.parseFile(args[0], false)) { 
                int splitBy = Integer.parseInt(args[1]); 
                String filePrefix =  args[2]; 
                VTDNav vn = vg.getNav(); 
                AutoPilot ap = new AutoPilot(vn); 
                ap.selectXPath("/TopTag/ChildTag"); 
                // flb contains all the offset and length of the segments to be 
                // skipped 
                FastLongBuffer flb = new FastLongBuffer(4); 
                int i; 
                byte[] xml = vn.getXML().getBytes(); 
                while ((i = ap.evalXPath()) != -1) { 
                    flb.append(vn.getElementFragment()); 
                } 
                int size = flb.size(); 
                if (size != 0) { 
                    File fo = null; 
                    FileOutputStream fos = null; 
                    for (int k = 0; k < size; k++) { 
                        if (k % splitBy == 0) { 
                            if (fo != null) { 
                                fos.write(suffix.getBytes()); 
                                fos.close(); 
                                fo = null; 
                            } 
                        } 
                        if (fo == null) { 
                            fo = new File(filePrefix + k + ".xml"); 
                            fos = new FileOutputStream(fo); 
                            fos.write(prefix.getBytes()); 
                        } 
                        fos.write(xml, flb.lower32At(k), flb.upper32At(k)); 
                    } 
                    if (fo != null) { 
                        fos.write(suffix.getBytes()); 
                        fos.close(); 
                        fo = null; 
                    } 
                } 
            } 
        } catch (Exception e) { 
            e.printStackTrace(); 
        } 
    } 
}