/*
 * Decompiled with CFR 0.152.
 */
package org.apache.tez.analyzer.plugins;

import com.google.common.collect.Lists;
import java.util.LinkedList;
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.tez.analyzer.Analyzer;
import org.apache.tez.analyzer.CSVResult;
import org.apache.tez.analyzer.plugins.TezAnalyzerBase;
import org.apache.tez.common.Preconditions;
import org.apache.tez.common.counters.TaskCounter;
import org.apache.tez.common.counters.TezCounter;
import org.apache.tez.dag.api.TezException;
import org.apache.tez.history.parser.datamodel.DagInfo;
import org.apache.tez.history.parser.datamodel.TaskAttemptInfo;
import org.apache.tez.history.parser.datamodel.VertexInfo;

public class SkewAnalyzer
extends TezAnalyzerBase
implements Analyzer {
    private static final String SHUFFLE_BYTES_PER_ATTEMPT_PER_SOURCE = "tez.skew-analyzer.shuffle.bytes.per.source";
    private static final long SHUFFLE_BYTES_PER_ATTEMPT_PER_SOURCE_DEFAULT = 943718400L;
    private static final String ATTEMPT_SHUFFLE_KEY_GROUP_MIN_RATIO = "tez.skew-analyzer.shuffle.key.group.min.ratio";
    private static final float ATTEMPT_SHUFFLE_KEY_GROUP_MIN_RATIO_DEFAULT = 0.2f;
    private static final String ATTEMPT_SHUFFLE_KEY_GROUP_MAX_RATIO = "tez.skew-analyzer.shuffle.key.group.max.ratio";
    private static final float ATTEMPT_SHUFFLE_KEY_GROUP_MAX_RATIO_DEFAULT = 0.4f;
    private static final String[] headers = new String[]{"vertexName", "taskAttemptId", "counterGroup", "node", "REDUCE_INPUT_GROUPS", "REDUCE_INPUT_RECORDS", "ratio", "SHUFFLE_BYTES", "timeTaken", "observation"};
    private final CSVResult csvResult = new CSVResult(headers);
    private final Configuration config;
    private final float minRatio;
    private final float maxRatio;
    private final long maxShuffleBytesPerSource;

    public SkewAnalyzer(Configuration config) {
        this.config = config;
        this.maxRatio = config.getFloat(ATTEMPT_SHUFFLE_KEY_GROUP_MAX_RATIO, 0.4f);
        this.minRatio = config.getFloat(ATTEMPT_SHUFFLE_KEY_GROUP_MIN_RATIO, 0.2f);
        this.maxShuffleBytesPerSource = config.getLong(SHUFFLE_BYTES_PER_ATTEMPT_PER_SOURCE, 943718400L);
    }

    @Override
    public void analyze(DagInfo dagInfo) throws TezException {
        Preconditions.checkArgument((dagInfo != null ? 1 : 0) != 0, (Object)"DAG can't be null");
        this.analyzeReducers(dagInfo);
    }

    private void analyzeReducers(DagInfo dagInfo) {
        for (VertexInfo vertexInfo : dagInfo.getVertices()) {
            for (TaskAttemptInfo attemptInfo : vertexInfo.getTaskAttempts()) {
                this.analyzeGroupSkewPerSource(attemptInfo);
                this.analyzeRecordSkewPerSource(attemptInfo);
                this.analyzeForParallelism(attemptInfo);
            }
        }
    }

    private void analyzeGroupSkewPerSource(TaskAttemptInfo attemptInfo) {
        Map reduceInputGroups = attemptInfo.getCounter(TaskCounter.REDUCE_INPUT_GROUPS.toString());
        Map reduceInputRecords = attemptInfo.getCounter(TaskCounter.REDUCE_INPUT_RECORDS.toString());
        Map shuffleBytes = attemptInfo.getCounter(TaskCounter.SHUFFLE_BYTES.toString());
        for (Map.Entry entry : reduceInputGroups.entrySet()) {
            if (((String)entry.getKey()).equals(TaskCounter.class.getName())) continue;
            String counterGroup = (String)entry.getKey();
            long inputGroupsCount = ((TezCounter)entry.getValue()).getValue();
            long inputRecordsCount = reduceInputRecords.get(counterGroup) != null ? ((TezCounter)reduceInputRecords.get(counterGroup)).getValue() : 0L;
            long shuffleBytesPerSource = shuffleBytes.get(counterGroup) != null ? ((TezCounter)shuffleBytes.get(counterGroup)).getValue() : 0L;
            float ratio = (float)inputGroupsCount * 1.0f / (float)inputRecordsCount;
            if (shuffleBytesPerSource <= this.maxShuffleBytesPerSource || !(ratio < this.minRatio)) continue;
            LinkedList result = Lists.newLinkedList();
            result.add(attemptInfo.getTaskInfo().getVertexInfo().getVertexName());
            result.add(attemptInfo.getTaskAttemptId());
            result.add(counterGroup);
            result.add(attemptInfo.getNodeId());
            result.add(inputGroupsCount + "");
            result.add(inputRecordsCount + "");
            result.add(ratio + "");
            result.add(shuffleBytesPerSource + "");
            result.add(attemptInfo.getTimeTaken() + "");
            result.add("Please check partitioning. Otherwise consider increasing memLimit");
            this.csvResult.addRecord(result.toArray(new String[result.size()]));
        }
    }

    private void analyzeRecordSkewPerSource(TaskAttemptInfo attemptInfo) {
        Map vertexLevelReduceInputRecords = attemptInfo.getTaskInfo().getVertexInfo().getCounter(TaskCounter.REDUCE_INPUT_RECORDS.toString());
        int vertexNumTasks = attemptInfo.getTaskInfo().getVertexInfo().getNumTasks();
        Map reduceInputGroups = attemptInfo.getCounter(TaskCounter.REDUCE_INPUT_GROUPS.toString());
        Map reduceInputRecords = attemptInfo.getCounter(TaskCounter.REDUCE_INPUT_RECORDS.toString());
        Map shuffleBytes = attemptInfo.getCounter(TaskCounter.SHUFFLE_BYTES.toString());
        for (Map.Entry entry : reduceInputGroups.entrySet()) {
            if (((String)entry.getKey()).equals(TaskCounter.class.getName())) continue;
            String counterGroup = (String)entry.getKey();
            long inputGroupsCount = ((TezCounter)entry.getValue()).getValue();
            long inputRecordsCount = reduceInputRecords.get(counterGroup) != null ? ((TezCounter)reduceInputRecords.get(counterGroup)).getValue() : 0L;
            long shuffleBytesPerSource = shuffleBytes.get(counterGroup) != null ? ((TezCounter)shuffleBytes.get(counterGroup)).getValue() : 0L;
            long vertexLevelInputRecordsCount = vertexLevelReduceInputRecords.get(counterGroup) != null ? ((TezCounter)vertexLevelReduceInputRecords.get(counterGroup)).getValue() : 0L;
            float ratio = (float)inputRecordsCount * 1.0f / (float)vertexLevelInputRecordsCount;
            if (vertexNumTasks <= 1 || !(ratio > this.maxRatio) || !((double)inputRecordsCount > (double)vertexLevelInputRecordsCount * 0.6)) continue;
            LinkedList result = Lists.newLinkedList();
            result.add(attemptInfo.getTaskInfo().getVertexInfo().getVertexName());
            result.add(attemptInfo.getTaskAttemptId());
            result.add(counterGroup);
            result.add(attemptInfo.getNodeId());
            result.add(inputGroupsCount + "");
            result.add(inputRecordsCount + "");
            result.add(ratio + "");
            result.add(shuffleBytesPerSource + "");
            result.add(attemptInfo.getTimeTaken() + "");
            result.add("Some task attempts are getting > 60% of reduce input records. Consider adjusting parallelism & check partition logic");
            this.csvResult.addRecord(result.toArray(new String[result.size()]));
        }
    }

    private void analyzeForParallelism(TaskAttemptInfo attemptInfo) {
        Map reduceInputGroups = attemptInfo.getCounter(TaskCounter.REDUCE_INPUT_GROUPS.toString());
        Map reduceInputRecords = attemptInfo.getCounter(TaskCounter.REDUCE_INPUT_RECORDS.toString());
        Map shuffleBytes = attemptInfo.getCounter(TaskCounter.SHUFFLE_BYTES.toString());
        for (Map.Entry entry : reduceInputGroups.entrySet()) {
            if (((String)entry.getKey()).equals(TaskCounter.class.getName())) continue;
            String counterGroup = (String)entry.getKey();
            long inputGroupsCount = ((TezCounter)entry.getValue()).getValue();
            long inputRecordsCount = reduceInputRecords.get(counterGroup) != null ? ((TezCounter)reduceInputRecords.get(counterGroup)).getValue() : 0L;
            long shuffleBytesPerSource = shuffleBytes.get(counterGroup) != null ? ((TezCounter)shuffleBytes.get(counterGroup)).getValue() : 0L;
            float ratio = (float)inputGroupsCount * 1.0f / (float)inputRecordsCount;
            if (shuffleBytesPerSource <= 943718400L || !(ratio > this.minRatio) || !(ratio < this.maxRatio)) continue;
            LinkedList result = Lists.newLinkedList();
            result.add(attemptInfo.getTaskInfo().getVertexInfo().getVertexName());
            result.add(attemptInfo.getTaskAttemptId());
            result.add(counterGroup);
            result.add(attemptInfo.getNodeId());
            result.add(inputGroupsCount + "");
            result.add(inputRecordsCount + "");
            result.add(ratio + "");
            result.add(shuffleBytesPerSource + "");
            result.add(attemptInfo.getTimeTaken() + "");
            result.add("Consider increasing parallelism.");
            this.csvResult.addRecord(result.toArray(new String[result.size()]));
        }
    }

    @Override
    public CSVResult getResult() throws TezException {
        return this.csvResult;
    }

    @Override
    public String getName() {
        return "Skew Analyzer";
    }

    @Override
    public String getDescription() {
        return "Analyzer reducer skews by mining reducer task counters";
    }

    @Override
    public Configuration getConfiguration() {
        return null;
    }

    public static void main(String[] args) throws Exception {
        Configuration config = new Configuration();
        SkewAnalyzer analyzer = new SkewAnalyzer(config);
        int res = ToolRunner.run((Configuration)config, (Tool)analyzer, (String[])args);
        analyzer.printResults();
        System.exit(res);
    }
}

