0

when I am using rome api "com.rometools:rome:1.15.0" to parse a xml document:

package com.dolphin.soa.post;

import com.rometools.rome.feed.synd.SyndFeed;
import com.rometools.rome.io.SyndFeedInput;
import com.rometools.rome.io.XmlReader;
import lombok.extern.slf4j.Slf4j;

import java.net.URL;

/**
 * @author dolphin
 */
@Slf4j
public class MiniExample {

    public static void main(String[] args) {
        try {
            URL url = new URL("https://blog.scottlowe.org/feed.xml");
            SyndFeed feed = new SyndFeedInput().build(new XmlReader(url));
            System.out.print(feed.getCopyright());
        }catch (Exception e){
            log.error("error",e);
        }
    }
}

give me error:

Invalid XML: Error on line 1: Content is not allowed in prolog.

is it possible to fix this problem? I tried to decompress the return stream before parse and fix this problem:

public static InputStream decompressStream(InputStream input) throws IOException {
        PushbackInputStream pb = new PushbackInputStream(input, 2); //we need a pushbackstream to look ahead
        byte[] signature = new byte[2];
        int len = pb.read(signature); //read the signature
        pb.unread(signature, 0, len); //push back the signature to the stream
        if (signature[0] == (byte) 0x1f && signature[1] == (byte) 0x8b) //check if matches standard gzip magic number
            return new GZIPInputStream(pb);
        else
            return pb;
    }
Dolphin
  • 13,739
  • 26
  • 110
  • 272
  • 1
    Your content seems gzip compressed. From the error, I think `XmlReader` or URL does not take that into account. – NoDataFound Aug 06 '21 at 03:13

0 Answers0