/*
 * Decompiled with CFR 0.152.
 */
package org.apache.nutch.fetcher;

import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.util.StringUtils;
import org.apache.nutch.crawl.CrawlDatum;
import org.apache.nutch.crawl.NutchWritable;
import org.apache.nutch.crawl.SignatureFactory;
import org.apache.nutch.fetcher.FetchItem;
import org.apache.nutch.fetcher.FetchItemQueue;
import org.apache.nutch.fetcher.FetchItemQueues;
import org.apache.nutch.fetcher.FetchNode;
import org.apache.nutch.fetcher.FetchNodeDb;
import org.apache.nutch.fetcher.FetcherThreadEvent;
import org.apache.nutch.fetcher.FetcherThreadPublisher;
import org.apache.nutch.fetcher.QueueFeeder;
import org.apache.nutch.metadata.Metadata;
import org.apache.nutch.metadata.Nutch;
import org.apache.nutch.net.URLExemptionFilters;
import org.apache.nutch.net.URLFilterException;
import org.apache.nutch.net.URLFilters;
import org.apache.nutch.net.URLNormalizers;
import org.apache.nutch.net.protocols.ProtocolLogUtil;
import org.apache.nutch.parse.Outlink;
import org.apache.nutch.parse.Parse;
import org.apache.nutch.parse.ParseData;
import org.apache.nutch.parse.ParseImpl;
import org.apache.nutch.parse.ParseOutputFormat;
import org.apache.nutch.parse.ParseResult;
import org.apache.nutch.parse.ParseSegment;
import org.apache.nutch.parse.ParseStatus;
import org.apache.nutch.parse.ParseText;
import org.apache.nutch.parse.ParseUtil;
import org.apache.nutch.protocol.Content;
import org.apache.nutch.protocol.ProtocolFactory;
import org.apache.nutch.protocol.ProtocolStatus;
import org.apache.nutch.scoring.ScoringFilterException;
import org.apache.nutch.scoring.ScoringFilters;
import org.apache.nutch.service.NutchServer;
import org.apache.nutch.util.StringUtil;
import org.apache.nutch.util.URLUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class FetcherThread
extends Thread {
    private static final Logger LOG = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
    private Configuration conf;
    private URLFilters urlFilters;
    private URLExemptionFilters urlExemptionFilters;
    private ScoringFilters scfilters;
    private ParseUtil parseUtil;
    private URLNormalizers normalizers;
    private ProtocolFactory protocolFactory;
    private long maxCrawlDelay;
    private long minCrawlDelay;
    private String queueMode;
    private int maxRedirect;
    private boolean maxRedirectExceededSkip = false;
    private String reprUrl;
    private boolean redirecting;
    private int redirectCount;
    private boolean ignoreInternalLinks;
    private boolean ignoreExternalLinks;
    private boolean ignoreAlsoRedirects;
    private String ignoreExternalLinksMode;
    private final int maxOutlinks;
    private final int maxOutlinkLength;
    private final int interval;
    private int maxOutlinkDepth;
    private int maxOutlinkDepthNumLinks;
    private boolean outlinksIgnoreExternal;
    URLFilters urlFiltersForOutlinks;
    URLNormalizers normalizersForOutlinks;
    private boolean skipTruncated;
    private boolean halted = false;
    private AtomicInteger activeThreads;
    private FetchItemQueues fetchQueues;
    private QueueFeeder feeder;
    private AtomicInteger spinWaiting;
    private AtomicLong lastRequestStart;
    private AtomicInteger errors;
    private String segmentName;
    private boolean parsing;
    private Mapper.Context context;
    private boolean storingContent;
    private boolean signatureWithoutParsing;
    private AtomicInteger pages;
    private AtomicLong bytes;
    private List<Content> robotsTxtContent = null;
    private long robotsDeferVisitsDelay;
    private int robotsDeferVisitsRetries;
    private FetchNode fetchNode;
    private boolean reportToNutchServer;
    private FetcherThreadPublisher publisher;
    private boolean activatePublisher;
    private ProtocolLogUtil logUtil = new ProtocolLogUtil();

    public FetcherThread(Configuration conf, AtomicInteger activeThreads, FetchItemQueues fetchQueues, QueueFeeder feeder, AtomicInteger spinWaiting, AtomicLong lastRequestStart, Mapper.Context context, AtomicInteger errors, String segmentName, boolean parsing, boolean storingContent, AtomicInteger pages, AtomicLong bytes) {
        this.setDaemon(true);
        this.setName("FetcherThread");
        this.conf = conf;
        this.urlFilters = new URLFilters(conf);
        this.urlExemptionFilters = new URLExemptionFilters(conf);
        this.scfilters = new ScoringFilters(conf);
        this.parseUtil = new ParseUtil(conf);
        this.skipTruncated = conf.getBoolean("parser.skip.truncated", true);
        this.signatureWithoutParsing = conf.getBoolean("fetcher.signature", false);
        this.protocolFactory = new ProtocolFactory(conf);
        this.normalizers = new URLNormalizers(conf, "fetcher");
        this.maxCrawlDelay = conf.getInt("fetcher.max.crawl.delay", 30) * 1000;
        float crawlDelay = conf.getFloat("fetcher.server.delay", 1.0f);
        this.minCrawlDelay = (long)(conf.getFloat("fetcher.min.crawl.delay", crawlDelay) * 1000.0f);
        this.activeThreads = activeThreads;
        this.fetchQueues = fetchQueues;
        this.feeder = feeder;
        this.spinWaiting = spinWaiting;
        this.lastRequestStart = lastRequestStart;
        this.context = context;
        this.errors = errors;
        this.segmentName = segmentName;
        this.parsing = parsing;
        this.storingContent = storingContent;
        this.pages = pages;
        this.bytes = bytes;
        this.logUtil.setConf(conf);
        if (parsing) {
            if (conf.getBoolean("parse.filter.urls", true)) {
                this.urlFiltersForOutlinks = this.urlFilters;
            }
            if (conf.getBoolean("parse.normalize.urls", true)) {
                this.normalizersForOutlinks = new URLNormalizers(conf, "outlink");
            }
        }
        if (conf.getBoolean("http.robots.503.defer.visits", true)) {
            this.robotsDeferVisitsDelay = conf.getLong("http.robots.503.defer.visits.delay", 300000L);
            this.robotsDeferVisitsRetries = conf.getInt("http.robots.503.defer.visits.retries", 3);
        }
        if (this.activatePublisher = conf.getBoolean("fetcher.publisher", false)) {
            this.publisher = new FetcherThreadPublisher(conf);
        }
        this.queueMode = conf.get("fetcher.queue.mode", "byHost");
        this.queueMode = FetchItemQueues.checkQueueMode(this.queueMode);
        LOG.info("{} {} Using queue mode : {}", new Object[]{this.getName(), Thread.currentThread().getId(), this.queueMode});
        this.maxRedirect = conf.getInt("http.redirect.max", 3);
        this.maxRedirectExceededSkip = conf.getBoolean("http.redirect.max.exceeded.skip", false);
        int maxOutlinksPerPage = conf.getInt("db.max.outlinks.per.page", 100);
        this.maxOutlinks = maxOutlinksPerPage < 0 ? Integer.MAX_VALUE : maxOutlinksPerPage;
        int maxOutlinkL = conf.getInt("db.max.outlink.length", 4096);
        this.maxOutlinkLength = maxOutlinkL < 0 ? Integer.MAX_VALUE : maxOutlinkL;
        this.interval = conf.getInt("db.fetch.interval.default", 2592000);
        this.ignoreInternalLinks = conf.getBoolean("db.ignore.internal.links", false);
        this.ignoreExternalLinks = conf.getBoolean("db.ignore.external.links", false);
        this.ignoreAlsoRedirects = conf.getBoolean("db.ignore.also.redirects", true);
        this.ignoreExternalLinksMode = conf.get("db.ignore.external.links.mode", "byHost");
        this.maxOutlinkDepth = conf.getInt("fetcher.follow.outlinks.depth", -1);
        this.outlinksIgnoreExternal = conf.getBoolean("fetcher.follow.outlinks.ignore.external", false);
        this.maxOutlinkDepthNumLinks = conf.getInt("fetcher.follow.outlinks.num.links", 4);
        if (conf.getBoolean("fetcher.store.robotstxt", false)) {
            if (storingContent) {
                this.robotsTxtContent = new LinkedList<Content>();
            } else {
                LOG.warn("{} {} Ignoring fetcher.store.robotstxt because not storing content (fetcher.store.content)!", (Object)this.getName(), (Object)Thread.currentThread().getId());
            }
        }
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     * WARNING - Removed back jump from a try to a catch block - possible behaviour change.
     * Unable to fully structure code
     * Enabled aggressive block sorting
     * Enabled unnecessary exception pruning
     * Enabled aggressive exception aggregation
     */
    @Override
    public void run() {
        this.activeThreads.incrementAndGet();
        fit = null;
        try {
            if (this.parsing && NutchServer.getInstance().isRunning()) {
                this.reportToNutchServer = true;
            }
lbl7:
            // 6 sources

            while (true) {
                this.fetchNode = this.reportToNutchServer != false ? new FetchNode() : null;
                if (this.isHalted()) {
                    FetcherThread.LOG.debug("{} set to halted", (Object)this.getName());
                    fit = null;
                    if (fit != null) {
                        this.fetchQueues.finishFetchItem(fit);
                    }
                    this.activeThreads.decrementAndGet();
                }
                ** GOTO lbl-1000
                break;
            }
        }
        catch (Throwable e) {
            FetcherThread.LOG.error("fetcher caught:", e);
            if (fit != null) {
                this.fetchQueues.finishFetchItem(fit);
            }
            this.activeThreads.decrementAndGet();
            FetcherThread.LOG.info("{} {} -finishing thread {}, activeThreads={}", new Object[]{this.getName(), Thread.currentThread().getId(), this.getName(), this.activeThreads});
            return;
        }
        catch (Throwable var14_24) {
            if (fit != null) {
                this.fetchQueues.finishFetchItem(fit);
            }
            this.activeThreads.decrementAndGet();
            FetcherThread.LOG.info("{} {} -finishing thread {}, activeThreads={}", new Object[]{this.getName(), Thread.currentThread().getId(), this.getName(), this.activeThreads});
            throw var14_24;
        }
        FetcherThread.LOG.info("{} {} -finishing thread {}, activeThreads={}", new Object[]{this.getName(), Thread.currentThread().getId(), this.getName(), this.activeThreads});
        return;
lbl-1000:
        // 1 sources

        {
            block43: {
                fit = this.fetchQueues.getFetchItem();
                if (fit != null) ** GOTO lbl-1000
                if (!this.feeder.isAlive() && this.fetchQueues.getTotalSize() <= 0) break block43;
                FetcherThread.LOG.debug("{} spin-waiting ...", (Object)this.getName());
                this.spinWaiting.incrementAndGet();
                try {
                    Thread.sleep(500L);
                }
                catch (Exception var2_3) {
                    // empty catch block
                }
                this.spinWaiting.decrementAndGet();
                ** GOTO lbl7
            }
            FetcherThread.LOG.info("{} {} has no more work available", (Object)this.getName(), (Object)Thread.currentThread().getId());
            if (fit != null) {
                this.fetchQueues.finishFetchItem(fit);
            }
            this.activeThreads.decrementAndGet();
        }
        FetcherThread.LOG.info("{} {} -finishing thread {}, activeThreads={}", new Object[]{this.getName(), Thread.currentThread().getId(), this.getName(), this.activeThreads});
        return;
lbl-1000:
        // 1 sources

        {
            this.lastRequestStart.set(System.currentTimeMillis());
            reprUrlWritable = (Text)fit.datum.getMetaData().get((Object)Nutch.WRITABLE_REPR_URL_KEY);
            if (reprUrlWritable == null) {
                this.setReprUrl(fit.url.toString());
            } else {
                this.setReprUrl(reprUrlWritable.toString());
            }
            try {
                this.redirecting = false;
                this.redirectCount = 0;
                if (this.activatePublisher) {
                    startEvent = new FetcherThreadEvent(FetcherThreadEvent.PublishEventType.START, fit.getUrl().toString());
                    this.publisher.publish(startEvent, this.conf);
                }
                do {
                    FetcherThread.LOG.info("{} {} fetching {} (queue crawl delay={}ms)", new Object[]{this.getName(), Thread.currentThread().getId(), fit.url, this.fetchQueues.getFetchItemQueue((String)fit.queueID).crawlDelay});
                    FetcherThread.LOG.debug("redirectCount={}", (Object)this.redirectCount);
                    this.redirecting = false;
                    protocol = this.protocolFactory.getProtocol(fit.u);
                    rules = protocol.getRobotRules(fit.url, fit.datum, this.robotsTxtContent);
                    if (this.robotsTxtContent != null) {
                        this.outputRobotsTxt(this.robotsTxtContent);
                        this.robotsTxtContent.clear();
                    }
                    if (rules.isDeferVisits()) {
                        FetcherThread.LOG.info("Defer visits for queue {} : {}", (Object)fit.queueID, (Object)fit.url);
                        if (this.fetchQueues.timelimitExceeded()) {
                            this.fetchQueues.finishFetchItem(fit, true);
                        } else {
                            this.fetchQueues.addFetchItem(fit);
                        }
                        killedURLs = this.fetchQueues.checkExceptionThreshold(fit.getQueueID(), this.robotsDeferVisitsRetries + 1, this.robotsDeferVisitsDelay);
                        if (killedURLs == 0) continue;
                        this.context.getCounter("FetcherStatus", "robots_defer_visits_dropped").increment((long)killedURLs);
                        continue;
                    }
                    if (!rules.isAllowed(fit.url.toString())) {
                        this.fetchQueues.finishFetchItem(fit, true);
                        FetcherThread.LOG.info("Denied by robots.txt: {}", (Object)fit.url);
                        this.output(fit.url, fit.datum, null, ProtocolStatus.STATUS_ROBOTS_DENIED, 37);
                        this.context.getCounter("FetcherStatus", "robots_denied").increment(1L);
                        continue;
                    }
                    if (rules.getCrawlDelay() > 0L) {
                        if (rules.getCrawlDelay() > this.maxCrawlDelay && this.maxCrawlDelay >= 0L) {
                            this.fetchQueues.finishFetchItem(fit, true);
                            FetcherThread.LOG.info("Crawl-Delay for {} too long ({} ms), skipping", (Object)fit.url, (Object)rules.getCrawlDelay());
                            this.output(fit.url, fit.datum, null, ProtocolStatus.STATUS_ROBOTS_DENIED, 37);
                            this.context.getCounter("FetcherStatus", "robots_denied_maxcrawldelay").increment(1L);
                            continue;
                        }
                        fiq = this.fetchQueues.getFetchItemQueue(fit.queueID);
                        crawlDelay = rules.getCrawlDelay();
                        if (crawlDelay < this.minCrawlDelay) {
                            FetcherThread.LOG.info("Crawl-Delay for {} too short ({} ms), adjusting to {} ms", new Object[]{fit.url, rules.getCrawlDelay(), this.minCrawlDelay});
                            crawlDelay = this.minCrawlDelay;
                        }
                        fiq.crawlDelay = crawlDelay;
                        FetcherThread.LOG.debug("Crawl delay for queue: {} is set to {} as per robots.txt. url: {}", new Object[]{fit.queueID, fiq.crawlDelay, fit.url});
                    }
                    output = protocol.getProtocolOutput(fit.url, fit.datum);
                    status = output.getStatus();
                    content = output.getContent();
                    pstatus = null;
                    this.fetchQueues.finishFetchItem(fit);
                    if (this.fetchNode != null) {
                        this.fetchNode.setStatus(status.getCode());
                        this.fetchNode.setFetchTime(System.currentTimeMillis());
                        this.fetchNode.setUrl(fit.url);
                    }
                    if (this.activatePublisher) {
                        endEvent = new FetcherThreadEvent(FetcherThreadEvent.PublishEventType.END, fit.getUrl().toString());
                        endEvent.addEventData("status", status.getName());
                        this.publisher.publish(endEvent, this.conf);
                    }
                    this.context.getCounter("FetcherStatus", status.getName()).increment(1L);
                    switch (status.getCode()) {
                        case 1: {
                            pstatus = this.output(fit.url, fit.datum, content, status, 33, fit.outlinkDepth);
                            this.updateStatus(content.getContent().length);
                            if (pstatus == null || !pstatus.isSuccess() || pstatus.getMinorCode() != 100 || (redirUrl = this.handleRedirect(fit, newUrl = pstatus.getMessage(), (refreshTime = Integer.parseInt(pstatus.getArgs()[1])) < 5, "content")) == null) break;
                            fit = this.queueRedirect(redirUrl, fit);
                            break;
                        }
                        case 12: 
                        case 13: {
                            if (status.getCode() == 12) {
                                code = 36;
                                temp = false;
                            } else {
                                code = 35;
                                temp = true;
                            }
                            this.output(fit.url, fit.datum, content, status, code);
                            newUrl = status.getMessage();
                            redirUrl = this.handleRedirect(fit, newUrl, temp, "protocol");
                            if (redirUrl != null) {
                                fit = this.queueRedirect(redirUrl, fit);
                                break;
                            }
                            this.redirecting = false;
                            break;
                        }
                        case 16: {
                            this.logError(fit.url, status.getMessage());
                            killedURLs = this.fetchQueues.checkExceptionThreshold(fit.getQueueID());
                            if (killedURLs != 0) {
                                this.context.getCounter("FetcherStatus", "AboveExceptionThresholdInQueue").increment((long)killedURLs);
                            }
                        }
                        case 15: {
                            this.output(fit.url, fit.datum, null, status, 34);
                            break;
                        }
                        case 11: 
                        case 14: 
                        case 17: 
                        case 18: {
                            this.output(fit.url, fit.datum, null, status, 37);
                            break;
                        }
                        case 21: {
                            this.output(fit.url, fit.datum, null, status, 38);
                            break;
                        }
                        default: {
                            FetcherThread.LOG.warn("{} {} Unknown ProtocolStatus: {}", new Object[]{this.getName(), Thread.currentThread().getId(), status.getCode()});
                            this.output(fit.url, fit.datum, null, status, 34);
                        }
                    }
                    if (!this.redirecting || this.redirectCount <= this.maxRedirect) continue;
                    this.fetchQueues.finishFetchItem(fit);
                    this.context.getCounter("FetcherStatus", "redirect_count_exceeded").increment(1L);
                    FetcherThread.LOG.info("{} {} - redirect count exceeded {} ({})", new Object[]{this.getName(), Thread.currentThread().getId(), fit.url, this.maxRedirectExceededSkip != false ? "skipped" : "linked"});
                    if (this.maxRedirectExceededSkip) continue;
                    newUrl = new Text(status.getMessage());
                    newDatum = this.createRedirDatum(newUrl, fit, (byte)67);
                    this.output(newUrl, newDatum, null, null, 67);
                } while (this.redirecting && this.redirectCount <= this.maxRedirect);
            }
            catch (Throwable t) {
                this.fetchQueues.finishFetchItem(fit);
                message = FetcherThread.LOG.isDebugEnabled() != false ? StringUtils.stringifyException((Throwable)t) : (this.logUtil.logShort(t) != false ? t.getClass().getName() : StringUtils.stringifyException((Throwable)t));
                this.logError(fit.url, message);
                this.output(fit.url, fit.datum, null, ProtocolStatus.STATUS_FAILED, 34);
                ** continue;
            }
        }
    }

    private Text handleRedirect(FetchItem fit, String newUrl, boolean temp, String redirType) throws MalformedURLException, URLFilterException, InterruptedException {
        if (newUrl.length() > this.maxOutlinkLength) {
            return null;
        }
        newUrl = this.normalizers.normalize(newUrl, "fetcher");
        newUrl = this.urlFilters.filter(newUrl);
        String urlString = fit.url.toString();
        if (newUrl == null || newUrl.equals(urlString)) {
            LOG.debug(" - {} redirect skipped: {}", (Object)redirType, (Object)(newUrl != null ? "to same url" : "filtered"));
            return null;
        }
        if (this.ignoreAlsoRedirects && (this.ignoreExternalLinks || this.ignoreInternalLinks)) {
            try {
                String newHost;
                String origHost;
                URL origUrl = fit.u;
                URL redirUrl = new URL(newUrl);
                if (this.ignoreExternalLinks) {
                    String newHostOrDomain;
                    String origHostOrDomain;
                    if ("bydomain".equalsIgnoreCase(this.ignoreExternalLinksMode)) {
                        origHostOrDomain = URLUtil.getDomainName(origUrl).toLowerCase();
                        newHostOrDomain = URLUtil.getDomainName(redirUrl).toLowerCase();
                    } else {
                        origHostOrDomain = origUrl.getHost().toLowerCase();
                        newHostOrDomain = redirUrl.getHost().toLowerCase();
                    }
                    if (!origHostOrDomain.equals(newHostOrDomain)) {
                        LOG.debug(" - ignoring redirect {} from {} to {} because external links are ignored", new Object[]{redirType, urlString, newUrl});
                        return null;
                    }
                }
                if (this.ignoreInternalLinks && (origHost = origUrl.getHost().toLowerCase()).equals(newHost = redirUrl.getHost().toLowerCase())) {
                    LOG.debug(" - ignoring redirect {} from {} to {} because internal links are ignored", new Object[]{redirType, urlString, newUrl});
                    return null;
                }
            }
            catch (MalformedURLException e) {
                return null;
            }
        }
        this.reprUrl = URLUtil.chooseRepr(this.reprUrl, newUrl, temp);
        Text url = new Text(newUrl);
        if (this.maxRedirect > 0) {
            this.redirecting = true;
            ++this.redirectCount;
            LOG.debug(" - {} redirect to {} (fetching now)", (Object)redirType, (Object)url);
            return url;
        }
        CrawlDatum newDatum = this.createRedirDatum(url, fit, (byte)67);
        this.output(url, newDatum, null, null, 67);
        LOG.debug(" - {} redirect to {} (fetching later)", (Object)redirType, (Object)url);
        return null;
    }

    private CrawlDatum createRedirDatum(Text redirUrl, FetchItem fit, byte status) {
        CrawlDatum newDatum = new CrawlDatum(status, fit.datum.getFetchInterval(), fit.datum.getScore());
        newDatum.getMetaData().putAll((Map)fit.datum.getMetaData());
        try {
            this.scfilters.initialScore(redirUrl, newDatum);
        }
        catch (ScoringFilterException e) {
            LOG.error("Scoring filtering failed for {}: ", (Object)redirUrl, (Object)e);
        }
        if (this.reprUrl != null) {
            newDatum.getMetaData().put((Writable)Nutch.WRITABLE_REPR_URL_KEY, (Writable)new Text(this.reprUrl));
        }
        return newDatum;
    }

    private FetchItem queueRedirect(Text redirUrl, FetchItem fit) throws ScoringFilterException {
        if (this.fetchQueues.redirectIsQueuedRecently(redirUrl)) {
            this.redirecting = false;
            this.context.getCounter("FetcherStatus", "redirect_deduplicated").increment(1L);
            LOG.debug(" - ignoring redirect from {} to {} as duplicate", (Object)fit.url, (Object)redirUrl);
            return null;
        }
        if (this.fetchQueues.timelimitExceeded()) {
            this.redirecting = false;
            this.context.getCounter("FetcherStatus", "hitByTimeLimit").increment(1L);
            LOG.debug(" - ignoring redirect from {} to {} - timelimit reached", (Object)fit.url, (Object)redirUrl);
            return null;
        }
        CrawlDatum newDatum = this.createRedirDatum(redirUrl, fit, (byte)1);
        if ((fit = FetchItem.create(redirUrl, newDatum, this.queueMode)) != null) {
            FetchItemQueue fiq = this.fetchQueues.getFetchItemQueue(fit.queueID);
            fiq.addInProgressFetchItem(fit);
        } else {
            this.redirecting = false;
            this.context.getCounter("FetcherStatus", "FetchItem.notCreated.redirect").increment(1L);
        }
        return fit;
    }

    private void logError(Text url, String message) {
        LOG.info("{} {} fetch of {} failed with: {}", new Object[]{this.getName(), Thread.currentThread().getId(), url, message});
        this.errors.incrementAndGet();
    }

    private ParseStatus output(Text key, CrawlDatum datum, Content content, ProtocolStatus pstatus, int status) throws InterruptedException {
        return this.output(key, datum, content, pstatus, status, 0);
    }

    private ParseStatus output(Text key, CrawlDatum datum, Content content, ProtocolStatus pstatus, int status, int outlinkDepth) throws InterruptedException {
        Parse p;
        ParseResult parseResult;
        block25: {
            datum.setStatus(status);
            datum.setFetchTime(System.currentTimeMillis());
            if (pstatus != null) {
                datum.getMetaData().put((Writable)Nutch.WRITABLE_PROTO_STATUS_KEY, (Writable)pstatus);
            }
            parseResult = null;
            if (content != null) {
                Metadata metadata = content.getMetadata();
                if (content.getContentType() != null) {
                    datum.getMetaData().put((Writable)new Text("Content-Type"), (Writable)new Text(content.getContentType()));
                }
                metadata.set("nutch.segment.name", this.segmentName);
                try {
                    this.scfilters.passScoreBeforeParsing(key, datum, content);
                }
                catch (Exception e) {
                    LOG.warn("{} {} Couldn't pass score, url {} ({})", new Object[]{this.getName(), Thread.currentThread().getId(), key, e});
                }
                if (status == 33) {
                    if (!(!this.parsing || this.skipTruncated && ParseSegment.isTruncated(content))) {
                        try {
                            parseResult = this.parseUtil.parse(content);
                        }
                        catch (Exception e) {
                            LOG.warn("{} {} Error parsing: {}: {}", new Object[]{this.getName(), Thread.currentThread().getId(), key, StringUtils.stringifyException((Throwable)e)});
                        }
                    }
                    if (parseResult == null && (this.parsing || this.signatureWithoutParsing)) {
                        byte[] signature = SignatureFactory.getSignature(this.conf).calculate(content, new ParseStatus().getEmptyParse(this.conf));
                        datum.setSignature(signature);
                    }
                }
                content.getMetadata().add("_fst_", Integer.toString(status));
            }
            try {
                this.context.write((Object)key, (Object)new NutchWritable((Writable)datum));
                if (content != null && this.storingContent) {
                    this.context.write((Object)key, (Object)new NutchWritable(content));
                }
                if (parseResult == null) break block25;
                for (Map.Entry entry : parseResult) {
                    Text url = (Text)entry.getKey();
                    Parse parse = (Parse)entry.getValue();
                    ParseStatus parseStatus = parse.getData().getStatus();
                    ParseData parseData = parse.getData();
                    if (!parseStatus.isSuccess()) {
                        LOG.warn("{} {} Error parsing: {}: {}", new Object[]{this.getName(), Thread.currentThread().getId(), key, parseStatus});
                        parse = parseStatus.getEmptyParse(this.conf);
                    }
                    byte[] signature = SignatureFactory.getSignature(this.conf).calculate(content, parse);
                    parseData.getContentMeta().set("nutch.segment.name", this.segmentName);
                    parseData.getContentMeta().set("nutch.content.digest", StringUtil.toHexString(signature));
                    parseData.getContentMeta().set("_ftk_", Long.toString(datum.getFetchTime()));
                    if (url.equals((Object)key)) {
                        datum.setSignature(signature);
                    }
                    try {
                        this.scfilters.passScoreAfterParsing(url, content, parse);
                    }
                    catch (Exception e) {
                        LOG.warn("{} {} Couldn't pass score, url {} ({})", new Object[]{this.getName(), Thread.currentThread().getId(), key, e});
                    }
                    String origin = null;
                    Outlink[] links = parseData.getOutlinks();
                    int outlinksToStore = Math.min(this.maxOutlinks, links.length);
                    if (this.ignoreExternalLinks || this.ignoreInternalLinks) {
                        URL originURL = new URL(url.toString());
                        origin = "bydomain".equalsIgnoreCase(this.ignoreExternalLinksMode) ? URLUtil.getDomainName(originURL).toLowerCase() : originURL.getHost().toLowerCase();
                    }
                    if (this.fetchNode != null) {
                        this.fetchNode.setOutlinks(links);
                        this.fetchNode.setTitle(parseData.getTitle());
                        FetchNodeDb.getInstance().put(this.fetchNode.getUrl().toString(), this.fetchNode);
                    }
                    int validCount = 0;
                    ArrayList<Outlink> outlinkList = new ArrayList<Outlink>(outlinksToStore);
                    HashSet<String> outlinks = new HashSet<String>(outlinksToStore);
                    for (int i = 0; i < links.length && validCount < outlinksToStore; ++i) {
                        String toUrl = links[i].getToUrl();
                        if (toUrl.length() > this.maxOutlinkLength || (toUrl = ParseOutputFormat.filterNormalize(url.toString(), toUrl, origin, this.ignoreInternalLinks, this.ignoreExternalLinks, this.ignoreExternalLinksMode, this.urlFiltersForOutlinks, this.urlExemptionFilters, this.normalizersForOutlinks)) == null) continue;
                        ++validCount;
                        links[i].setUrl(toUrl);
                        outlinkList.add(links[i]);
                        outlinks.add(toUrl);
                    }
                    if (this.activatePublisher) {
                        FetcherThreadEvent reportEvent = new FetcherThreadEvent(FetcherThreadEvent.PublishEventType.REPORT, url.toString());
                        reportEvent.addOutlinksToEventData(outlinkList);
                        reportEvent.addEventData("title", parseData.getTitle());
                        reportEvent.addEventData("content-type", parseData.getContentMeta().get("content-type"));
                        reportEvent.addEventData("score", Float.valueOf(datum.getScore()));
                        reportEvent.addEventData("fetchTime", datum.getFetchTime());
                        reportEvent.addEventData("content-language", parseData.getContentMeta().get("content-language"));
                        this.publisher.publish(reportEvent, this.conf);
                    }
                    if (this.maxOutlinkDepth > 0 && outlinkDepth < this.maxOutlinkDepth && !this.fetchQueues.timelimitExceeded()) {
                        FetchItem ft = FetchItem.create(url, null, this.queueMode);
                        FetchItemQueue queue = this.fetchQueues.getFetchItemQueue(ft.queueID);
                        queue.alreadyFetched.add(url.toString().hashCode());
                        this.context.getCounter("FetcherOutlinks", "outlinks_detected").increment((long)outlinks.size());
                        int outlinkCounter = 0;
                        Iterator iter = outlinks.iterator();
                        while (iter.hasNext() && outlinkCounter < this.maxOutlinkDepthNumLinks) {
                            int urlHashCode;
                            String followUrl = (String)iter.next();
                            if (this.outlinksIgnoreExternal && !URLUtil.getHost(url.toString()).equals(URLUtil.getHost(followUrl)) || queue.alreadyFetched.contains(urlHashCode = followUrl.hashCode())) continue;
                            queue.alreadyFetched.add(urlHashCode);
                            FetchItem fit = FetchItem.create(new Text(followUrl), new CrawlDatum(67, this.interval), this.queueMode, outlinkDepth + 1);
                            this.context.getCounter("FetcherOutlinks", "outlinks_following").increment(1L);
                            this.fetchQueues.addFetchItem(fit);
                            ++outlinkCounter;
                        }
                    }
                    parseData.setOutlinks(outlinkList.toArray(new Outlink[outlinkList.size()]));
                    this.context.write((Object)url, (Object)new NutchWritable(new ParseImpl(new ParseText(parse.getText()), parseData, parse.isCanonical())));
                }
            }
            catch (IOException e) {
                LOG.error("fetcher caught:", (Throwable)e);
            }
        }
        if (parseResult != null && !parseResult.isEmpty() && (p = parseResult.get(content.getUrl())) != null) {
            this.context.getCounter("ParserStatus", ParseStatus.majorCodes[p.getData().getStatus().getMajorCode()]).increment(1L);
            return p.getData().getStatus();
        }
        return null;
    }

    private void outputRobotsTxt(List<Content> robotsTxtContent) throws InterruptedException {
        for (Content robotsTxt : robotsTxtContent) {
            LOG.debug("Fetched and stored robots.txt {}", (Object)robotsTxt.getUrl());
            try {
                this.context.write((Object)new Text(robotsTxt.getUrl()), (Object)new NutchWritable(robotsTxt));
            }
            catch (IOException e) {
                LOG.error("Fetcher failed to store the robots.txt:", (Throwable)e);
            }
        }
    }

    private void updateStatus(int bytesInPage) throws IOException {
        this.pages.incrementAndGet();
        this.bytes.addAndGet(bytesInPage);
    }

    public synchronized void setHalted(boolean halted) {
        this.halted = halted;
    }

    public synchronized boolean isHalted() {
        return this.halted;
    }

    public String getReprUrl() {
        return this.reprUrl;
    }

    private void setReprUrl(String urlString) {
        this.reprUrl = urlString;
    }
}

