/*
 * Decompiled with CFR 0.152.
 */
package org.apache.gobblin.compaction.mapreduce;

import com.google.common.base.Enums;
import com.google.common.base.Optional;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import com.google.common.primitives.Ints;
import java.io.IOException;
import java.net.URI;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.stream.Collectors;
import org.apache.avro.Schema;
import org.apache.avro.mapred.AvroKey;
import org.apache.avro.mapred.AvroValue;
import org.apache.avro.mapreduce.AvroJob;
import org.apache.commons.math3.primes.Primes;
import org.apache.gobblin.compaction.dataset.DatasetHelper;
import org.apache.gobblin.compaction.mapreduce.avro.AvroKeyCompactorOutputFormat;
import org.apache.gobblin.compaction.mapreduce.avro.AvroKeyDedupReducer;
import org.apache.gobblin.compaction.mapreduce.avro.AvroKeyMapper;
import org.apache.gobblin.compaction.mapreduce.avro.AvroKeyRecursiveCombineFileInputFormat;
import org.apache.gobblin.compaction.mapreduce.avro.MRCompactorAvroKeyDedupJobRunner;
import org.apache.gobblin.compaction.parser.CompactionPathParser;
import org.apache.gobblin.compaction.verify.InputRecordCountHelper;
import org.apache.gobblin.configuration.State;
import org.apache.gobblin.dataset.FileSystemDataset;
import org.apache.gobblin.util.AvroUtils;
import org.apache.gobblin.util.FileListUtils;
import org.apache.gobblin.util.HadoopUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.filecache.DistributedCache;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapred.TaskCompletionEvent;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.TaskCompletionEvent;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class CompactionAvroJobConfigurator {
    private static final Logger log = LoggerFactory.getLogger(CompactionAvroJobConfigurator.class);
    protected final State state;
    protected final FileSystem fs;
    protected Job configuredJob;
    protected final boolean shouldDeduplicate;
    protected Path mrOutputPath = null;
    protected boolean isJobCreated = false;
    protected Collection<Path> mapReduceInputPaths = null;
    private long fileNameRecordCount = 0L;

    public CompactionAvroJobConfigurator(State state) throws IOException {
        this.state = state;
        this.fs = this.getFileSystem(state);
        this.shouldDeduplicate = state.getPropAsBoolean("compaction.should.deduplicate", true);
    }

    private MRCompactorAvroKeyDedupJobRunner.DedupKeyOption getDedupKeyOption() {
        if (!this.state.contains("compaction.job.dedup.key")) {
            return MRCompactorAvroKeyDedupJobRunner.DEFAULT_DEDUP_KEY_OPTION;
        }
        Optional option = Enums.getIfPresent(MRCompactorAvroKeyDedupJobRunner.DedupKeyOption.class, (String)this.state.getProp("compaction.job.dedup.key").toUpperCase());
        return option.isPresent() ? (MRCompactorAvroKeyDedupJobRunner.DedupKeyOption)((Object)option.get()) : MRCompactorAvroKeyDedupJobRunner.DEFAULT_DEDUP_KEY_OPTION;
    }

    private Schema getKeySchema(Job job, Schema topicSchema) throws IOException {
        boolean keySchemaFileSpecified = this.state.contains("compaction.job.avro.key.schema.loc");
        Schema keySchema = null;
        MRCompactorAvroKeyDedupJobRunner.DedupKeyOption dedupKeyOption = this.getDedupKeyOption();
        if (dedupKeyOption == MRCompactorAvroKeyDedupJobRunner.DedupKeyOption.ALL) {
            log.info("Using all attributes in the schema (except Map, Arrar and Enum fields) for compaction");
            keySchema = (Schema)AvroUtils.removeUncomparableFields((Schema)topicSchema).get();
        } else if (dedupKeyOption == MRCompactorAvroKeyDedupJobRunner.DedupKeyOption.KEY) {
            log.info("Using key attributes in the schema for compaction");
            keySchema = (Schema)AvroUtils.removeUncomparableFields((Schema)MRCompactorAvroKeyDedupJobRunner.getKeySchema(topicSchema)).get();
        } else if (keySchemaFileSpecified) {
            Path keySchemaFile = new Path(this.state.getProp("compaction.job.avro.key.schema.loc"));
            log.info("Using attributes specified in schema file " + keySchemaFile + " for compaction");
            try {
                keySchema = AvroUtils.parseSchemaFromFile((Path)keySchemaFile, (FileSystem)this.fs);
            }
            catch (IOException e) {
                log.error("Failed to parse avro schema from " + keySchemaFile + ", using key attributes in the schema for compaction");
                keySchema = (Schema)AvroUtils.removeUncomparableFields((Schema)MRCompactorAvroKeyDedupJobRunner.getKeySchema(topicSchema)).get();
            }
            if (!MRCompactorAvroKeyDedupJobRunner.isKeySchemaValid(keySchema, topicSchema)) {
                log.warn(String.format("Key schema %s is not compatible with record schema %s.", keySchema, topicSchema) + "Using key attributes in the schema for compaction");
                keySchema = (Schema)AvroUtils.removeUncomparableFields((Schema)MRCompactorAvroKeyDedupJobRunner.getKeySchema(topicSchema)).get();
            }
        } else {
            log.info("Property compaction.job.avro.key.schema.loc not provided. Using key attributes in the schema for compaction");
            keySchema = (Schema)AvroUtils.removeUncomparableFields((Schema)MRCompactorAvroKeyDedupJobRunner.getKeySchema(topicSchema)).get();
        }
        return keySchema;
    }

    private void configureSchema(Job job) throws IOException {
        Schema newestSchema = MRCompactorAvroKeyDedupJobRunner.getNewestSchemaFromSource(job, this.fs);
        if (this.state.getPropAsBoolean("compaction.job.avro.single.input.schema", true)) {
            AvroJob.setInputKeySchema((Job)job, (Schema)newestSchema);
        }
        AvroJob.setMapOutputKeySchema((Job)job, (Schema)(this.shouldDeduplicate ? this.getKeySchema(job, newestSchema) : newestSchema));
        AvroJob.setMapOutputValueSchema((Job)job, (Schema)newestSchema);
        AvroJob.setOutputKeySchema((Job)job, (Schema)newestSchema);
    }

    protected void configureMapper(Job job) {
        job.setInputFormatClass(AvroKeyRecursiveCombineFileInputFormat.class);
        job.setMapperClass(AvroKeyMapper.class);
        job.setMapOutputKeyClass(AvroKey.class);
        job.setMapOutputValueClass(AvroValue.class);
    }

    protected void configureReducer(Job job) throws IOException {
        job.setOutputFormatClass(AvroKeyCompactorOutputFormat.class);
        job.setReducerClass(AvroKeyDedupReducer.class);
        job.setOutputKeyClass(AvroKey.class);
        job.setOutputValueClass(NullWritable.class);
        this.setNumberOfReducers(job);
    }

    protected void setNumberOfReducers(Job job) throws IOException {
        long inputSize = 0L;
        for (Path inputPath : this.mapReduceInputPaths) {
            inputSize += this.fs.getContentSummary(inputPath).getLength();
        }
        long targetFileSize = this.state.getPropAsLong("compaction.job.target.output.file.size", 0x20000000L);
        int maxNumReducers = this.state.getPropAsInt("compaction.job.max.num.reducers", 900);
        int numReducers = Math.min(Ints.checkedCast((long)(inputSize / targetFileSize)) + 1, maxNumReducers);
        boolean usePrimeReducers = this.state.getPropAsBoolean("compaction.job.use.prime.reducers", true);
        if (usePrimeReducers && numReducers != 1) {
            numReducers = Primes.nextPrime((int)numReducers);
        }
        job.setNumReduceTasks(numReducers);
    }

    private Path concatPaths(String ... names) {
        if (names == null || names.length == 0) {
            return null;
        }
        Path cur = new Path(names[0]);
        for (int i = 1; i < names.length; ++i) {
            cur = new Path(cur, new Path(names[i]));
        }
        return cur;
    }

    protected void configureInputAndOutputPaths(Job job, FileSystemDataset dataset) throws IOException {
        this.mapReduceInputPaths = this.getGranularInputPaths(dataset.datasetRoot());
        for (Path path : this.mapReduceInputPaths) {
            FileInputFormat.addInputPath((Job)job, (Path)path);
        }
        String mrOutputBase = this.state.getProp("compaction.tmp.job.dir");
        CompactionPathParser parser = new CompactionPathParser(this.state);
        CompactionPathParser.CompactionParserResult rst = parser.parse(dataset);
        this.mrOutputPath = this.concatPaths(mrOutputBase, rst.getDatasetName(), rst.getDstSubDir(), rst.getTimeString());
        log.info("Cleaning temporary MR output directory: " + this.mrOutputPath);
        this.fs.delete(this.mrOutputPath, true);
        FileOutputFormat.setOutputPath((Job)job, (Path)this.mrOutputPath);
    }

    public Job createJob(FileSystemDataset dataset) throws IOException {
        Configuration conf = HadoopUtils.getConfFromState((State)this.state);
        if (conf.get("mapreduce.output.fileoutputformat.compress") == null && conf.get("mapred.output.compress") == null) {
            conf.setBoolean("mapreduce.output.fileoutputformat.compress", true);
        }
        if (conf.get("mapreduce.job.complete.cancel.delegation.tokens") == null) {
            conf.setBoolean("mapreduce.job.complete.cancel.delegation.tokens", false);
        }
        this.addJars(conf);
        Job job = Job.getInstance((Configuration)conf);
        job.setJobName("Gobblin MR Compaction");
        this.configureInputAndOutputPaths(job, dataset);
        this.configureMapper(job);
        this.configureReducer(job);
        if (!this.shouldDeduplicate) {
            job.setNumReduceTasks(0);
        }
        this.configureSchema(job);
        this.isJobCreated = true;
        this.configuredJob = job;
        return job;
    }

    private void addJars(Configuration conf) throws IOException {
        if (!this.state.contains("compaction.jars")) {
            return;
        }
        Path jarFileDir = new Path(this.state.getProp("compaction.jars"));
        for (FileStatus status : this.fs.listStatus(jarFileDir)) {
            DistributedCache.addFileToClassPath((Path)status.getPath(), (Configuration)conf, (FileSystem)this.fs);
        }
    }

    private FileSystem getFileSystem(State state) throws IOException {
        Configuration conf = HadoopUtils.getConfFromState((State)state);
        String uri = state.getProp("source.filebased.fs.uri", "file:///");
        FileSystem fs = FileSystem.get((URI)URI.create(uri), (Configuration)conf);
        return fs;
    }

    protected Collection<Path> getGranularInputPaths(Path path) throws IOException {
        boolean appendDelta = this.state.getPropAsBoolean("compaction.rename.source.dir.enabled", false);
        HashSet uncompacted = Sets.newHashSet();
        HashSet total = Sets.newHashSet();
        for (FileStatus fileStatus : FileListUtils.listFilesRecursively((FileSystem)this.fs, (Path)path)) {
            if (appendDelta) {
                if (!fileStatus.getPath().getParent().toString().endsWith("_COMPLETE")) {
                    uncompacted.add(fileStatus.getPath().getParent());
                }
                total.add(fileStatus.getPath().getParent());
                continue;
            }
            uncompacted.add(fileStatus.getPath().getParent());
        }
        if (appendDelta) {
            this.fileNameRecordCount = new InputRecordCountHelper(this.state).calculateRecordCount(total);
            log.info("{} has total input record count (based on file name) {}", (Object)path, (Object)this.fileNameRecordCount);
        }
        return uncompacted;
    }

    private static List<org.apache.hadoop.mapreduce.TaskCompletionEvent> getAllTaskCompletionEvent(Job completedJob) {
        LinkedList<org.apache.hadoop.mapreduce.TaskCompletionEvent> completionEvents = new LinkedList<org.apache.hadoop.mapreduce.TaskCompletionEvent>();
        try {
            TaskCompletionEvent[] bunchOfEvents;
            while ((bunchOfEvents = completedJob.getTaskCompletionEvents(completionEvents.size())) != null && bunchOfEvents.length != 0) {
                completionEvents.addAll(Arrays.asList(bunchOfEvents));
            }
        }
        catch (IOException e) {
        }
        return completionEvents;
    }

    private static List<org.apache.hadoop.mapreduce.TaskCompletionEvent> getUnsuccessfulTaskCompletionEvent(Job completedJob) {
        return CompactionAvroJobConfigurator.getAllTaskCompletionEvent(completedJob).stream().filter(te -> te.getStatus() != TaskCompletionEvent.Status.SUCCEEDED).collect(Collectors.toList());
    }

    private static boolean isFailedPath(Path path, List<org.apache.hadoop.mapreduce.TaskCompletionEvent> failedEvents) {
        return failedEvents.stream().anyMatch(event -> path.toString().contains("/" + event.getTaskAttemptId().toString() + "/"));
    }

    public static List<Path> removeFailedPaths(Job job, Path tmpPath, FileSystem fs) throws IOException {
        List<org.apache.hadoop.mapreduce.TaskCompletionEvent> failedEvents = CompactionAvroJobConfigurator.getUnsuccessfulTaskCompletionEvent(job);
        List<Path> allFilePaths = DatasetHelper.getApplicableFilePaths(fs, tmpPath, Lists.newArrayList((Object[])new String[]{"avro"}));
        ArrayList<Path> goodPaths = new ArrayList<Path>();
        for (Path filePath : allFilePaths) {
            if (CompactionAvroJobConfigurator.isFailedPath(filePath, failedEvents)) {
                fs.delete(filePath, false);
                log.error("{} is a bad path so it was deleted", (Object)filePath);
                continue;
            }
            goodPaths.add(filePath);
        }
        return goodPaths;
    }

    public FileSystem getFs() {
        return this.fs;
    }

    public Job getConfiguredJob() {
        return this.configuredJob;
    }

    public boolean isShouldDeduplicate() {
        return this.shouldDeduplicate;
    }

    public Path getMrOutputPath() {
        return this.mrOutputPath;
    }

    public boolean isJobCreated() {
        return this.isJobCreated;
    }

    public Collection<Path> getMapReduceInputPaths() {
        return this.mapReduceInputPaths;
    }

    public long getFileNameRecordCount() {
        return this.fileNameRecordCount;
    }
}

