/*
 * Decompiled with CFR 0.152.
 */
package org.apache.nutch.hostdb;

import com.tdunning.math.stats.TDigest;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.SynchronousQueue;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.util.StringUtils;
import org.apache.nutch.crawl.CrawlDatum;
import org.apache.nutch.crawl.NutchWritable;
import org.apache.nutch.hostdb.CrawlDatumProcessor;
import org.apache.nutch.hostdb.HostDatum;
import org.apache.nutch.hostdb.ResolverThread;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class UpdateHostDbReducer
extends Reducer<Text, NutchWritable, Text, HostDatum> {
    private static final Logger LOG = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
    protected ResolverThread resolverThread = null;
    protected Integer numResolverThreads = 10;
    protected static Integer purgeFailedHostsThreshold = -1;
    protected static Integer recheckInterval = 86400000;
    protected static boolean checkFailed = false;
    protected static boolean checkNew = false;
    protected static boolean checkKnown = false;
    protected static boolean checkAny = false;
    protected static boolean force = false;
    protected static long urlLimit = -1L;
    protected static long now = new Date().getTime();
    protected static String[] numericFields;
    protected static String[] stringFields;
    protected static int[] percentiles;
    protected static Text[] numericFieldWritables;
    protected static Text[] stringFieldWritables;
    protected static CrawlDatumProcessor[] crawlDatumProcessors;
    protected BlockingQueue<Runnable> queue = new SynchronousQueue<Runnable>();
    protected ThreadPoolExecutor executor = null;

    public void setup(Reducer.Context context) {
        int i;
        Configuration conf = context.getConfiguration();
        purgeFailedHostsThreshold = conf.getInt("hostdb.purge.failed.hosts.threshold", -1);
        this.numResolverThreads = conf.getInt("hostdb.num.resolvers.threads", 10);
        recheckInterval = conf.getInt("hostdb.recheck.interval", 86400) * 1000;
        checkFailed = conf.getBoolean("hostdb.check.failed", false);
        checkNew = conf.getBoolean("hostdb.check.new", false);
        checkKnown = conf.getBoolean("hostdb.check.known", false);
        checkAny = checkNew || checkKnown || checkFailed;
        force = conf.getBoolean("hostdb.force.check", false);
        urlLimit = conf.getLong("hostdb.url.limit", -1L);
        numericFields = conf.getStrings("hostdb.numeric.fields");
        stringFields = conf.getStrings("hostdb.string.fields");
        percentiles = conf.getInts("hostdb.percentiles");
        String[] crawlDatumProcessorClassnames = conf.getStrings("hostdb.crawldatum.processors");
        if (crawlDatumProcessorClassnames != null) {
            crawlDatumProcessors = new CrawlDatumProcessor[crawlDatumProcessorClassnames.length];
            for (i = 0; i < crawlDatumProcessorClassnames.length; ++i) {
                LOG.info("Instantiating custom CrawlDatumProcessor {}", (Object)crawlDatumProcessorClassnames[i]);
                try {
                    CrawlDatumProcessor processorImpl;
                    Class<CrawlDatumProcessor> processorClass = Class.forName(crawlDatumProcessorClassnames[i]).asSubclass(CrawlDatumProcessor.class);
                    UpdateHostDbReducer.crawlDatumProcessors[i] = processorImpl = processorClass.getConstructor(Configuration.class).newInstance(conf);
                    continue;
                }
                catch (Exception e) {
                    LOG.error("Unable to instantiate crawldatum processor: {} because: {}", new Object[]{crawlDatumProcessorClassnames[i], e.getMessage(), e});
                }
            }
        }
        if (numericFields != null) {
            numericFieldWritables = new Text[numericFields.length];
            for (i = 0; i < numericFields.length; ++i) {
                UpdateHostDbReducer.numericFieldWritables[i] = new Text(numericFields[i]);
            }
        }
        if (stringFields != null) {
            stringFieldWritables = new Text[stringFields.length];
            for (i = 0; i < stringFields.length; ++i) {
                UpdateHostDbReducer.stringFieldWritables[i] = new Text(stringFields[i]);
            }
        }
        if (checkAny) {
            this.executor = new ThreadPoolExecutor(this.numResolverThreads, this.numResolverThreads, 5L, TimeUnit.SECONDS, this.queue);
            this.executor.prestartAllCoreThreads();
        }
    }

    public void reduce(Text key, Iterable<NutchWritable> values, Reducer.Context context) throws IOException, InterruptedException {
        HashMap stringCounts = new HashMap();
        HashMap<String, Float> maximums = new HashMap<String, Float>();
        HashMap<String, Float> sums = new HashMap<String, Float>();
        HashMap<String, Long> counts = new HashMap<String, Long>();
        HashMap<String, Float> minimums = new HashMap<String, Float>();
        HashMap<String, TDigest> tdigests = new HashMap<String, TDigest>();
        HostDatum hostDatum = new HostDatum();
        float score = 0.0f;
        if (stringFields != null) {
            for (int i = 0; i < stringFields.length; ++i) {
                stringCounts.put(stringFields[i], new HashMap());
            }
        }
        for (NutchWritable nutchWritable : values) {
            Writable value = nutchWritable.get();
            if (value instanceof CrawlDatum) {
                CrawlDatum crawlDatum = (CrawlDatum)value;
                switch (crawlDatum.getStatus()) {
                    case 1: {
                        hostDatum.setUnfetched(hostDatum.getUnfetched() + 1L);
                        break;
                    }
                    case 2: {
                        hostDatum.setFetched(hostDatum.getFetched() + 1L);
                        break;
                    }
                    case 3: {
                        hostDatum.setGone(hostDatum.getGone() + 1L);
                        break;
                    }
                    case 4: {
                        hostDatum.setRedirTemp(hostDatum.getRedirTemp() + 1L);
                        break;
                    }
                    case 5: {
                        hostDatum.setRedirPerm(hostDatum.getRedirPerm() + 1L);
                        break;
                    }
                    case 6: {
                        hostDatum.setNotModified(hostDatum.getNotModified() + 1L);
                    }
                }
                if (crawlDatum.getRetriesSinceFetch() != 0) {
                    hostDatum.incConnectionFailures();
                }
                if (crawlDatum.getStatus() == 2 || crawlDatum.getStatus() == 6) {
                    int i;
                    if (stringFields != null) {
                        for (i = 0; i < stringFields.length; ++i) {
                            if (crawlDatum.getMetaData().get((Object)stringFieldWritables[i]) == null) continue;
                            String metadataValue = null;
                            try {
                                metadataValue = crawlDatum.getMetaData().get((Object)stringFieldWritables[i]).toString();
                            }
                            catch (Exception e) {
                                LOG.error("Metadata field {} is probably not a numeric value", (Object)stringFields[i]);
                            }
                            if (((Map)stringCounts.get(stringFields[i])).containsKey(metadataValue)) {
                                ((Map)stringCounts.get(stringFields[i])).put(metadataValue, (Long)((Map)stringCounts.get(stringFields[i])).get(metadataValue) + 1L);
                                continue;
                            }
                            ((Map)stringCounts.get(stringFields[i])).put(metadataValue, 1L);
                        }
                    }
                    if (numericFields != null) {
                        for (i = 0; i < numericFields.length; ++i) {
                            if (crawlDatum.getMetaData().get((Object)numericFieldWritables[i]) == null) continue;
                            try {
                                Float metadataValue = Float.valueOf(Float.parseFloat(crawlDatum.getMetaData().get((Object)numericFieldWritables[i]).toString()));
                                if (tdigests.containsKey(numericFields[i])) {
                                    ((TDigest)tdigests.get(numericFields[i])).add((double)metadataValue.floatValue());
                                } else {
                                    TDigest tdigest = TDigest.createDigest((double)100.0);
                                    tdigest.add((double)metadataValue.floatValue());
                                    tdigests.put(numericFields[i], tdigest);
                                }
                                if (minimums.containsKey(numericFields[i])) {
                                    if (metadataValue.floatValue() < ((Float)minimums.get(numericFields[i])).floatValue()) {
                                        minimums.put(numericFields[i], metadataValue);
                                    }
                                } else {
                                    minimums.put(numericFields[i], metadataValue);
                                }
                                if (maximums.containsKey(numericFields[i])) {
                                    if (metadataValue.floatValue() > ((Float)maximums.get(numericFields[i])).floatValue()) {
                                        maximums.put(numericFields[i], metadataValue);
                                    }
                                } else {
                                    maximums.put(numericFields[i], metadataValue);
                                }
                                if (sums.containsKey(numericFields[i])) {
                                    sums.put(numericFields[i], Float.valueOf(((Float)sums.get(numericFields[i])).floatValue() + metadataValue.floatValue()));
                                    counts.put(numericFields[i], (Long)counts.get(numericFields[i]) + 1L);
                                    continue;
                                }
                                sums.put(numericFields[i], metadataValue);
                                counts.put(numericFields[i], 1L);
                                continue;
                            }
                            catch (Exception e) {
                                LOG.error("{} when processing values for {}", (Object)e.getMessage(), (Object)key);
                            }
                        }
                    }
                }
                if (crawlDatumProcessors == null) continue;
                for (CrawlDatumProcessor processor : crawlDatumProcessors) {
                    processor.count(crawlDatum);
                }
                continue;
            }
            if (value instanceof HostDatum) {
                HostDatum hostDatum2 = (HostDatum)value;
                if (hostDatum2.hasHomepageUrl()) {
                    hostDatum.setHomepageUrl(hostDatum2.getHomepageUrl());
                }
                if (!hostDatum2.isEmpty()) {
                    hostDatum.setLastCheck(hostDatum2.getLastCheck());
                }
                if (hostDatum2.getDnsFailures() > 0L) {
                    hostDatum.setDnsFailures(hostDatum2.getDnsFailures());
                }
                if (hostDatum2.getConnectionFailures() > 0L) {
                    hostDatum.setConnectionFailures(hostDatum2.getConnectionFailures());
                }
                if (hostDatum2.hasMetaData()) {
                    hostDatum.setMetaData(hostDatum2.getMetaData());
                }
                if (!(hostDatum2.getScore() > 0.0f)) continue;
                hostDatum.setScore(hostDatum2.getScore());
                continue;
            }
            if (value instanceof FloatWritable) {
                FloatWritable floatWritable = (FloatWritable)value;
                score = floatWritable.get();
                continue;
            }
            LOG.error("Class {} not handled", value.getClass());
        }
        if (score > 0.0f) {
            hostDatum.setScore(score);
        }
        for (Map.Entry entry : stringCounts.entrySet()) {
            for (Map.Entry entry2 : ((Map)entry.getValue()).entrySet()) {
                hostDatum.getMetaData().put((Writable)new Text((String)entry.getKey() + "." + (String)entry2.getKey()), (Writable)new LongWritable(((Long)entry2.getValue()).longValue()));
            }
        }
        for (Map.Entry entry : maximums.entrySet()) {
            hostDatum.getMetaData().put((Writable)new Text("max." + (String)entry.getKey()), (Writable)new FloatWritable(((Float)entry.getValue()).floatValue()));
        }
        for (Map.Entry entry : sums.entrySet()) {
            hostDatum.getMetaData().put((Writable)new Text("avg." + (String)entry.getKey()), (Writable)new FloatWritable(((Float)entry.getValue()).floatValue() / (float)((Long)counts.get(entry.getKey())).longValue()));
        }
        for (Map.Entry entry : tdigests.entrySet()) {
            for (int i = 0; i < percentiles.length; ++i) {
                hostDatum.getMetaData().put((Writable)new Text("pct" + Long.toString(percentiles[i]) + "." + (String)entry.getKey()), (Writable)new FloatWritable((float)((TDigest)entry.getValue()).quantile(0.5)));
            }
        }
        for (Map.Entry entry : minimums.entrySet()) {
            hostDatum.getMetaData().put((Writable)new Text("min." + (String)entry.getKey()), (Writable)new FloatWritable(((Float)entry.getValue()).floatValue()));
        }
        if (urlLimit > -1L && hostDatum.numRecords() < urlLimit) {
            context.getCounter("UpdateHostDb", "url_limit_not_reached").increment(1L);
            return;
        }
        context.getCounter("UpdateHostDb", "total_hosts").increment(1L);
        if (this.shouldCheck(hostDatum)) {
            this.resolverThread = new ResolverThread(key.toString(), hostDatum, context, purgeFailedHostsThreshold);
            try {
                this.queue.put(this.resolverThread);
            }
            catch (InterruptedException e) {
                LOG.error("UpdateHostDb:", (Throwable)e);
            }
            return;
        }
        if (checkAny) {
            context.getCounter("UpdateHostDb", "skipped_not_eligible").increment(1L);
            LOG.debug("UpdateHostDb: {}: skipped_not_eligible", (Object)key);
        }
        if (crawlDatumProcessors != null) {
            for (CrawlDatumProcessor crawlDatumProcessor : crawlDatumProcessors) {
                crawlDatumProcessor.finalize(hostDatum);
            }
        }
        context.write((Object)key, (Object)hostDatum);
    }

    protected boolean shouldCheck(HostDatum datum) {
        if (checkNew && datum.isEmpty()) {
            return true;
        }
        if (checkKnown && !datum.isEmpty() && datum.getDnsFailures() == 0L) {
            return this.isEligibleForCheck(datum);
        }
        if (checkFailed && datum.getDnsFailures() > 0L) {
            return this.isEligibleForCheck(datum);
        }
        return false;
    }

    protected boolean isEligibleForCheck(HostDatum datum) {
        return force || datum.getLastCheck().getTime() + ((long)recheckInterval.intValue() * datum.getDnsFailures() + 1L) > now;
    }

    public void cleanup(Reducer.Context context) {
        if (this.executor == null) {
            return;
        }
        LOG.info("UpdateHostDb: feeder finished, waiting for shutdown");
        this.executor.shutdown();
        boolean finished = false;
        while (!finished) {
            try {
                if (!this.executor.isTerminated()) {
                    LOG.info("UpdateHostDb: resolver threads waiting: {}", (Object)this.executor.getPoolSize());
                    Thread.sleep(1000L);
                    continue;
                }
                finished = true;
            }
            catch (InterruptedException e) {
                LOG.warn(StringUtils.stringifyException((Throwable)e));
            }
        }
    }
}

