/*
 * Decompiled with CFR 0.152.
 */
package org.archive.extract;

import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.net.InetAddress;
import java.net.UnknownHostException;
import java.nio.charset.Charset;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.logging.Logger;
import org.archive.extract.ExtractorOutput;
import org.archive.format.gzip.GZIPMemberWriter;
import org.archive.format.gzip.GZIPMemberWriterCommittedOutputStream;
import org.archive.format.http.HttpHeaders;
import org.archive.format.json.JSONUtils;
import org.archive.format.warc.WARCRecordWriter;
import org.archive.resource.MetaData;
import org.archive.resource.Resource;
import org.archive.util.DateUtils;
import org.archive.util.IAUtils;
import org.archive.util.StreamCopy;
import org.archive.util.io.CommitedOutputStream;
import org.json.JSONException;

public class WATExtractorOutput
implements ExtractorOutput {
    WARCRecordWriter recW;
    private boolean wroteFirst;
    private GZIPMemberWriter gzW;
    private static int DEFAULT_BUFFER_RAM = 0x100000;
    private int bufferRAM = DEFAULT_BUFFER_RAM;
    private static final Charset UTF8 = Charset.forName("UTF-8");
    private String outputFile;
    private static final Logger LOG = Logger.getLogger(WATExtractorOutput.class.getName());

    public WATExtractorOutput(OutputStream out, String outputFile) {
        this.gzW = new GZIPMemberWriter(out);
        this.recW = new WARCRecordWriter();
        this.wroteFirst = false;
        this.outputFile = outputFile;
    }

    private CommitedOutputStream getOutput() {
        return new GZIPMemberWriterCommittedOutputStream(this.gzW, this.bufferRAM);
    }

    @Override
    public void output(Resource resource) throws IOException {
        String envelopeFormat;
        CommitedOutputStream cos;
        StreamCopy.readToEOF(resource.getInputStream());
        MetaData top = resource.getMetaData().getTopMetaData();
        if (!this.wroteFirst) {
            cos = this.getOutput();
            this.writeWARCInfo(cos, top);
            cos.commit();
            this.wroteFirst = true;
        }
        if ((envelopeFormat = JSONUtils.extractSingle(top, "Envelope.Format")) == null) {
            throw new IOException("Missing Envelope.Format");
        }
        cos = this.getOutput();
        if (envelopeFormat.startsWith("ARC")) {
            this.writeARC(cos, top);
        } else if (envelopeFormat.startsWith("WARC")) {
            this.writeWARC(cos, top);
        } else {
            throw new IOException("Unknown Envelope.Format");
        }
        cos.commit();
    }

    private void writeWARCInfo(OutputStream recOut, MetaData md) throws IOException {
        String filename = this.outputFile;
        if (filename == null || filename.length() == 0) {
            filename = JSONUtils.extractSingle(md, "Container.Filename");
            if (filename == null) {
                throw new IOException("No Container.Filename...");
            }
            if (filename.endsWith(".warc") || filename.endsWith(".warc.gz")) {
                filename = filename.replaceFirst("\\.warc$", ".warc.wat.gz");
                filename = filename.replaceFirst("\\.warc\\.gz$", ".warc.wat.gz");
            } else if (filename.endsWith(".arc") || filename.endsWith(".arc.gz")) {
                filename = filename.replaceFirst("\\.arc$", ".arc.wat.gz");
                filename = filename.replaceFirst("\\.arc\\.gz$", ".arc.wat.gz");
            }
        }
        File tmpFile = new File(filename);
        filename = tmpFile.getName();
        HttpHeaders headers = new HttpHeaders();
        headers.add("software", IAUtils.COMMONS_VERSION);
        headers.addDateHeader("extractedDate", new Date());
        try {
            InetAddress host = InetAddress.getLocalHost();
            headers.add("ip", host.getHostAddress());
            headers.add("hostname", host.getCanonicalHostName());
        }
        catch (UnknownHostException e) {
            LOG.warning("unable to obtain local crawl engine host :\n" + e.getMessage());
        }
        headers.add("format", IAUtils.WARC_FORMAT);
        headers.add("conformsTo", IAUtils.WARC_FORMAT_CONFORMS_TO);
        if (IAUtils.OPERATOR != null && IAUtils.OPERATOR.length() > 0) {
            headers.add("operator", IAUtils.OPERATOR);
        }
        if (IAUtils.PUBLISHER != null && IAUtils.PUBLISHER.length() > 0) {
            headers.add("publisher", IAUtils.PUBLISHER);
        }
        if (IAUtils.WAT_WARCINFO_DESCRIPTION != null && IAUtils.WAT_WARCINFO_DESCRIPTION.length() > 0) {
            headers.add("description", IAUtils.WAT_WARCINFO_DESCRIPTION);
        }
        ByteArrayOutputStream baos = new ByteArrayOutputStream();
        headers.write(baos);
        this.recW.writeWARCInfoRecord(recOut, filename, baos.toByteArray());
    }

    private String extractOrIO(MetaData md, String path) throws IOException {
        String value = JSONUtils.extractSingle(md, path);
        if (value == null) {
            throw new IOException("No " + path + " found.");
        }
        return value;
    }

    private void writeARC(OutputStream recOut, MetaData md) throws IOException {
        String targetURI = this.extractOrIO(md, "Envelope.ARC-Header-Metadata.Target-URI");
        String capDateString = this.extractOrIO(md, "Envelope.ARC-Header-Metadata.Date");
        String filename = this.extractOrIO(md, "Container.Filename");
        String offset = this.extractOrIO(md, "Container.Offset");
        String recId = String.format("<urn:arc:%s:%s>", filename, offset);
        this.writeWARCMDRecord(recOut, md, targetURI, capDateString, recId);
    }

    private void writeWARC(OutputStream recOut, MetaData md) throws IOException {
        String warcType = this.extractOrIO(md, "Envelope.WARC-Header-Metadata.WARC-Type");
        String targetURI = warcType.equals("warcinfo") ? JSONUtils.extractSingle(md, "Envelope.WARC-Header-Metadata.WARC-Filename") : this.extractOrIO(md, "Envelope.WARC-Header-Metadata.WARC-Target-URI");
        SimpleDateFormat dateFormat = new SimpleDateFormat("yyyyMMddHHmmss");
        String capDateString = dateFormat.format(new Date());
        String recId = this.extractOrIO(md, "Envelope.WARC-Header-Metadata.WARC-Record-ID");
        this.writeWARCMDRecord(recOut, md, targetURI, capDateString, recId);
    }

    private void writeWARCMDRecord(OutputStream recOut, MetaData md, String targetURI, String capDateString, String recId) throws IOException {
        Date capDate;
        ByteArrayOutputStream bos = new ByteArrayOutputStream();
        OutputStreamWriter osw = new OutputStreamWriter((OutputStream)bos, UTF8);
        try {
            md.write(osw);
        }
        catch (JSONException e1) {
            e1.printStackTrace();
            throw new IOException(e1);
        }
        osw.flush();
        try {
            capDate = DateUtils.getSecondsSinceEpoch(capDateString);
        }
        catch (ParseException e) {
            e.printStackTrace();
            capDate = new Date();
        }
        this.recW.writeJSONMetadataRecord(recOut, bos.toByteArray(), targetURI, capDate, recId);
    }

    private static String transformWARCDate(String input) {
        StringBuilder output = new StringBuilder(14);
        output.append(input.substring(0, 4));
        output.append(input.substring(5, 7));
        output.append(input.substring(8, 10));
        output.append(input.substring(11, 13));
        output.append(input.substring(14, 16));
        output.append(input.substring(17, 19));
        return output.toString();
    }
}

