swift-mirror/lib/Driver/Compilation.cpp

//===--- Compilation.cpp - Compilation Task Data Structure ----------------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2014 - 2018 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//

#include "swift/Driver/Compilation.h"

#include "swift/AST/DiagnosticEngine.h"
#include "swift/AST/DiagnosticsDriver.h"
#include "swift/AST/ExperimentalDependencies.h"
#include "swift/Basic/OutputFileMap.h"
#include "swift/Basic/Program.h"
#include "swift/Basic/STLExtras.h"
#include "swift/Basic/Statistic.h"
#include "swift/Basic/TaskQueue.h"
#include "swift/Basic/Version.h"
#include "swift/Basic/type_traits.h"
#include "swift/Driver/Action.h"
#include "swift/Driver/DependencyGraph.h"
#include "swift/Driver/Driver.h"
#include "swift/Driver/DriverIncrementalRanges.h"
#include "swift/Driver/ExperimentalDependencyDriverGraph.h"
#include "swift/Driver/Job.h"
#include "swift/Driver/ParseableOutput.h"
#include "swift/Driver/ToolChain.h"
#include "swift/Option/Options.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/TinyPtrVector.h"
#include "llvm/Option/Arg.h"
#include "llvm/Option/ArgList.h"
#include "llvm/Support/Chrono.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/Timer.h"
#include "llvm/Support/YAMLParser.h"
#include "llvm/Support/raw_ostream.h"

#include "CompilationRecord.h"

#include <signal.h>

#define DEBUG_TYPE "batch-mode"

// Batch-mode has a sub-mode for testing that randomizes batch partitions,
// by user-provided seed. That is the only thing randomized here.
#include <random>

using namespace swift;
using namespace swift::sys;
using namespace swift::driver;
using namespace llvm::opt;

struct LogJob {
  const Job *j;
  LogJob(const Job *j) : j(j) {}
};

struct LogJobArray {
  const ArrayRef<const Job *> js;
  LogJobArray(const ArrayRef<const Job *> js) : js(js) {}
};

struct LogJobSet {
  const SmallPtrSetImpl<const Job*> &js;
  LogJobSet(const SmallPtrSetImpl<const Job*> &js) : js(js) {}
};

llvm::raw_ostream &operator<<(llvm::raw_ostream &os, const LogJob &lj) {
  lj.j->printSummary(os);
  return os;
}

llvm::raw_ostream &operator<<(llvm::raw_ostream &os, const LogJobArray &ljs) {
  os << "[";
  interleave(ljs.js,
             [&](Job const *j) { os << LogJob(j); },
             [&]() { os << ' '; });
  os << "]";
  return os;
}

llvm::raw_ostream &operator<<(llvm::raw_ostream &os, const LogJobSet &ljs) {
  os << "{";
  interleave(ljs.js,
             [&](Job const *j) { os << LogJob(j); },
             [&]() { os << ' '; });
  os << "}";
  return os;
}

// clang-format off
Compilation::Compilation(DiagnosticEngine &Diags,
                         const ToolChain &TC,
                         OutputInfo const &OI,
                         OutputLevel Level,
                         std::unique_ptr<InputArgList> InputArgs,
                         std::unique_ptr<DerivedArgList> TranslatedArgs,
                         InputFileList InputsWithTypes,
                         std::string CompilationRecordPath,
                         bool OutputCompilationRecordForModuleOnlyBuild,
                         StringRef ArgsHash,
                         llvm::sys::TimePoint<> StartTime,
                         llvm::sys::TimePoint<> LastBuildTime,
                         size_t FilelistThreshold,
                         bool EnableIncrementalBuild,
                         bool EnableBatchMode,
                         unsigned BatchSeed,
                         Optional<unsigned> BatchCount,
                         Optional<unsigned> BatchSizeLimit,
                         bool SaveTemps,
                         bool ShowDriverTimeCompilation,
                         std::unique_ptr<UnifiedStatsReporter> StatsReporter,
                         bool EnableExperimentalDependencies,
                         bool VerifyExperimentalDependencyGraphAfterEveryImport,
                         bool EmitExperimentalDependencyDotFileAfterEveryImport,
                         bool ExperimentalDependenciesIncludeIntrafileOnes,
                         bool EnableSourceRangeDependencies)
  : Diags(Diags), TheToolChain(TC),
    TheOutputInfo(OI),
    Level(Level),
    RawInputArgs(std::move(InputArgs)),
    TranslatedArgs(std::move(TranslatedArgs)),
    InputFilesWithTypes(std::move(InputsWithTypes)),
    CompilationRecordPath(CompilationRecordPath),
    ArgsHash(ArgsHash),
    BuildStartTime(StartTime),
    LastBuildTime(LastBuildTime),
    EnableIncrementalBuild(EnableIncrementalBuild),
    OutputCompilationRecordForModuleOnlyBuild(
        OutputCompilationRecordForModuleOnlyBuild),
    EnableBatchMode(EnableBatchMode),
    BatchSeed(BatchSeed),
    BatchCount(BatchCount),
    BatchSizeLimit(BatchSizeLimit),
    SaveTemps(SaveTemps),
    ShowDriverTimeCompilation(ShowDriverTimeCompilation),
    Stats(std::move(StatsReporter)),
    FilelistThreshold(FilelistThreshold),
    EnableExperimentalDependencies(EnableExperimentalDependencies),
    VerifyExperimentalDependencyGraphAfterEveryImport(
      VerifyExperimentalDependencyGraphAfterEveryImport),
    EmitExperimentalDependencyDotFileAfterEveryImport(
      EmitExperimentalDependencyDotFileAfterEveryImport),
    ExperimentalDependenciesIncludeIntrafileOnes(
      ExperimentalDependenciesIncludeIntrafileOnes),
    EnableSourceRangeDependencies(EnableSourceRangeDependencies) {

};
// clang-format on

static bool writeFilelistIfNecessary(const Job *job, const ArgList &args,
                                     DiagnosticEngine &diags);

using CommandSet = llvm::SmallPtrSet<const Job *, 16>;
using CommandSetVector = llvm::SetVector<const Job*>;
using BatchPartition = std::vector<std::vector<const Job*>>;

using InputInfoMap = llvm::SmallMapVector<const llvm::opt::Arg *,
                                          CompileJobAction::InputInfo, 16>;

namespace swift {
namespace driver {
  class PerformJobsState {

    /// The containing Compilation object.
    Compilation &Comp;

    /// All jobs which have been scheduled for execution (whether or not
    /// they've finished execution), or which have been determined that they
    /// don't need to run.
    CommandSet ScheduledCommands;

    /// A temporary buffer to hold commands that were scheduled but haven't been
    /// added to the Task Queue yet, because we might try batching them together
    /// first.
    CommandSetVector PendingExecution;

    /// Set of synthetic BatchJobs that serve to cluster subsets of jobs waiting
    /// in PendingExecution. Also used to identify (then unpack) BatchJobs back
    /// to their underlying non-Batch Jobs, when running a callback from
    /// TaskQueue.
    CommandSet BatchJobs;

    /// Persistent counter for allocating quasi-PIDs to Jobs combined into
    /// BatchJobs. Quasi-PIDs are _negative_ PID-like unique keys used to
    /// masquerade BatchJob constituents as (quasi)processes, when writing
    /// parseable output to consumers that don't understand the idea of a batch
    /// job. They are negative in order to avoid possibly colliding with real
    /// PIDs (which are always positive). We start at -1000 here as a crude but
    /// harmless hedge against colliding with an errno value that might slip
    /// into the stream of real PIDs (say, due to a TaskQueue bug).
    int64_t NextBatchQuasiPID = -1000;

    /// All jobs which have finished execution or which have been determined
    /// that they don't need to run.
    CommandSet FinishedCommands;

    /// A map from a Job to the commands it is known to be blocking.
    ///
    /// The blocked jobs should be scheduled as soon as possible.
    llvm::SmallDenseMap<const Job *, TinyPtrVector<const Job *>, 16>
        BlockingCommands;

    /// A map from commands that didn't get to run to whether or not they affect
    /// downstream commands.
    ///
    /// Only intended for source files.
    llvm::SmallDenseMap<const Job *, bool, 16> UnfinishedCommands;

    /// Jobs that incremental-mode has decided it can skip.
    CommandSet DeferredCommands;

    /// Jobs in the initial set with Condition::Always, and having an existing
    /// .swiftdeps files.
    /// Set by scheduleInitialJobsForIncrementalCompilation and used only by
    /// additionalJobsToScheduleForDependencyBasedIncrementalCompilation.
    SmallVector<const Job *, 16> InitialCascadingCommands;

  public:
    /// Dependency graph for deciding which jobs are dirty (need running)
    /// or clean (can be skipped).
    using DependencyGraph = DependencyGraph<const Job *>;
    DependencyGraph StandardDepGraph;

    /// Experimental Dependency graph for finer-grained dependencies
    Optional<experimental_dependencies::ModuleDepGraph> ExpDepGraph;

  private:
    /// Helper for tracing the propagation of marks in the graph.
    DependencyGraph::MarkTracer ActualIncrementalTracer;
    DependencyGraph::MarkTracer *IncrementalTracer = nullptr;

    /// TaskQueue for execution.
    std::unique_ptr<TaskQueue> TQ;

    /// Cumulative result of PerformJobs(), accumulated from subprocesses.
    int Result = EXIT_SUCCESS;

    /// True if any Job crashed.
    bool AnyAbnormalExit = false;

    /// Timers for monitoring execution time of subprocesses.
    llvm::TimerGroup DriverTimerGroup {"driver", "Driver Compilation Time"};
    llvm::SmallDenseMap<const Job *, std::unique_ptr<llvm::Timer>, 16>
    DriverTimers;

    void noteBuilding(const Job *cmd, const bool forRanges, StringRef reason) {
      if (!Comp.getShowIncrementalBuildDecisions())
        return;
      if (ScheduledCommands.count(cmd))
        return;
      llvm::outs() << (forRanges ? "Ranges" : "Dependencies") << " queuing "
                   << reason << ": " << LogJob(cmd) << "\n";

      if (Comp.getEnableExperimentalDependencies())
        ExpDepGraph.getValue().printPath(llvm::outs(), cmd);
      else
        IncrementalTracer->printPath(
                                     llvm::outs(), cmd, [](raw_ostream &out, const Job *base) {
                                       out << llvm::sys::path::filename(base->getOutput().getBaseInput(0));
                                     });
    }

    const Job *findUnfinishedJob(ArrayRef<const Job *> JL) {
      for (const Job *Cmd : JL) {
        if (!FinishedCommands.count(Cmd))
          return Cmd;
      }
      return nullptr;
    }

    /// Schedule the given Job if it has not been scheduled and if all of
    /// its inputs are in FinishedCommands.
    void scheduleCommandIfNecessaryAndPossible(const Job *Cmd) {
      if (ScheduledCommands.count(Cmd)) {
        if (Comp.getShowJobLifecycle()) {
          llvm::outs() << "Already scheduled: " << LogJob(Cmd) << "\n";
        }
        return;
      }

      if (auto Blocking = findUnfinishedJob(Cmd->getInputs())) {
        BlockingCommands[Blocking].push_back(Cmd);
        if (Comp.getShowJobLifecycle()) {
          llvm::outs() << "Blocked by: " << LogJob(Blocking)
                       << ", now blocking jobs: "
                       << LogJobArray(BlockingCommands[Blocking]) << "\n";
        }
        return;
      }

      // Adding to scheduled means we've committed to its completion (not
      // distinguished from skipping). We never remove it once inserted.
      ScheduledCommands.insert(Cmd);

      // Adding to pending means it should be in the next round of additions to
      // the task queue (either batched or singularly); we remove Jobs from
      // PendingExecution once we hand them over to the TaskQueue.
      PendingExecution.insert(Cmd);
    }

    void addPendingJobToTaskQueue(const Job *Cmd) {
      // FIXME: Failing here should not take down the whole process.
      bool success =
          writeFilelistIfNecessary(Cmd, Comp.getArgs(), Comp.getDiags());
      assert(success && "failed to write filelist");
      (void)success;

      assert(Cmd->getExtraEnvironment().empty() &&
             "not implemented for compilations with multiple jobs");
      if (Comp.getShowJobLifecycle())
        llvm::outs() << "Added to TaskQueue: " << LogJob(Cmd) << "\n";
      TQ->addTask(Cmd->getExecutable(), Cmd->getArgumentsForTaskExecution(),
                  llvm::None, (void *)Cmd);
    }

    /// When a task finishes, check other Jobs that may be blocked.
    void markFinished(const Job *Cmd, bool Skipped=false) {
      if (Comp.getShowJobLifecycle()) {
        llvm::outs() << "Job "
                     << (Skipped ? "skipped" : "finished")
                     << ": " << LogJob(Cmd) << "\n";
      }
      FinishedCommands.insert(Cmd);
      if (auto *Stats = Comp.getStatsReporter()) {
          auto &D = Stats->getDriverCounters();
          if (Skipped)
            D.NumDriverJobsSkipped++;
          else
            D.NumDriverJobsRun++;
      }
      auto BlockedIter = BlockingCommands.find(Cmd);
      if (BlockedIter != BlockingCommands.end()) {
        auto AllBlocked = std::move(BlockedIter->second);
        if (Comp.getShowJobLifecycle()) {
          llvm::outs() << "Scheduling maybe-unblocked jobs: "
                       << LogJobArray(AllBlocked) << "\n";
        }
        BlockingCommands.erase(BlockedIter);
        for (auto *Blocked : AllBlocked)
          scheduleCommandIfNecessaryAndPossible(Blocked);
      }
    }

    bool isBatchJob(const Job *MaybeBatchJob) const {
      return BatchJobs.count(MaybeBatchJob) != 0;
    }

    /// Callback which will be called immediately after a task has started. This
    /// callback may be used to provide output indicating that the task began.
    void taskBegan(ProcessId Pid, void *Context) {
      // TODO: properly handle task began.
      const Job *BeganCmd = (const Job *)Context;

      if (Comp.getShowDriverTimeCompilation()) {
        llvm::SmallString<128> TimerName;
        llvm::raw_svector_ostream OS(TimerName);
        OS << LogJob(BeganCmd);
        DriverTimers.insert({
            BeganCmd,
              std::unique_ptr<llvm::Timer>(
                new llvm::Timer("task", OS.str(), DriverTimerGroup))
              });
        DriverTimers[BeganCmd]->startTimer();
      }

      switch (Comp.getOutputLevel()) {
      case OutputLevel::Normal:
        break;
        // For command line or verbose output, print out each command as it
        // begins execution.
      case OutputLevel::PrintJobs:
        BeganCmd->printCommandLineAndEnvironment(llvm::outs());
        break;
      case OutputLevel::Verbose:
        BeganCmd->printCommandLine(llvm::errs());
        break;
      case OutputLevel::Parseable:
        BeganCmd->forEachContainedJobAndPID(Pid, [&](const Job *J, Job::PID P) {
          parseable_output::emitBeganMessage(llvm::errs(), *J, P,
                                             TaskProcessInformation(Pid));
        });
        break;
      }
    }

    /// Note that a .swiftdeps file failed to load and take corrective actions:
    /// disable incremental logic and schedule all existing deferred commands.
    void
    dependencyLoadFailed(StringRef DependenciesFile, bool Warn=true) {
      if (Warn && Comp.getShowIncrementalBuildDecisions())
        Comp.getDiags().diagnose(SourceLoc(),
                                 diag::warn_unable_to_load_dependencies,
                                 DependenciesFile);
      Comp.disableIncrementalBuild();
      for (const Job *Cmd : DeferredCommands)
        scheduleCommandIfNecessaryAndPossible(Cmd);
      DeferredCommands.clear();
    }

    /// Helper that attempts to reload a job's .swiftdeps file after the job
    /// exits, and re-run transitive marking to ensure everything is properly
    /// invalidated by any new dependency edges introduced by it. If reloading
    /// fails, this can cause deferred jobs to be immediately scheduled.

    template <unsigned N>
    void reloadAndRemarkDeps(const Job *FinishedCmd, int ReturnCode,
                             SmallVector<const Job *, N> &Dependents) {
      if (Comp.getEnableExperimentalDependencies())
        reloadAndRemarkDeps(FinishedCmd, ReturnCode, Dependents,
                            ExpDepGraph.getValue());
      else
        reloadAndRemarkDeps(FinishedCmd, ReturnCode, Dependents,
                            StandardDepGraph);
    }

    template <unsigned N, typename DependencyGraphT>
    void reloadAndRemarkDeps(const Job *FinishedCmd,
                             int ReturnCode,
                             SmallVector<const Job *, N> &Dependents,
                             DependencyGraphT &DepGraph) {

      const CommandOutput &Output = FinishedCmd->getOutput();
      StringRef DependenciesFile =
          Output.getAdditionalOutputForType(file_types::TY_SwiftDeps);

      if (DependenciesFile.empty()) {
        // If this job doesn't track dependencies, it must always be run.
        // Note: In theory CheckDependencies makes sense as well (for a leaf
        // node in the dependency graph), and maybe even NewlyAdded (for very
        // coarse dependencies that always affect downstream nodes), but we're
        // not using either of those right now, and this logic should probably
        // be revisited when we are.
        assert(FinishedCmd->getCondition() == Job::Condition::Always);
      } else {
        // If we have a dependency file /and/ the frontend task exited normally,
        // we can be discerning about what downstream files to rebuild.
        if (ReturnCode == EXIT_SUCCESS || ReturnCode == EXIT_FAILURE) {
          // "Marked" means that everything provided by this node (i.e. Job) is
          // dirty. Thus any file using any of these provides must be
          // recompiled. (Only non-private entities are output as provides.) In
          // other words, this Job "cascades"; the need to recompile it causes
          // other recompilations. It is possible that the current code marks
          // things that do not need to be marked. Unecessary compilation would
          // result if that were the case.
          bool wasCascading = DepGraph.isMarked(FinishedCmd);

          switch (DepGraph.loadFromPath(FinishedCmd, DependenciesFile,
                                        Comp.getDiags())) {
          case DependencyGraphImpl::LoadResult::HadError:
            if (ReturnCode == EXIT_SUCCESS) {
              dependencyLoadFailed(DependenciesFile);
              Dependents.clear();
            } // else, let the next build handle it.
            break;
          case DependencyGraphImpl::LoadResult::UpToDate:
            if (!wasCascading)
              break;
            LLVM_FALLTHROUGH;
          case DependencyGraphImpl::LoadResult::AffectsDownstream:
            DepGraph.markTransitive(Dependents, FinishedCmd,
                                    IncrementalTracer);
            break;
          }
        } else {
          // If there's an abnormal exit (a crash), assume the worst.
          switch (FinishedCmd->getCondition()) {
          case Job::Condition::NewlyAdded:
            // The job won't be treated as newly added next time. Conservatively
            // mark it as affecting other jobs, because some of them may have
            // completed already.
            DepGraph.markTransitive(Dependents, FinishedCmd,
                                    IncrementalTracer);
            break;
          case Job::Condition::Always:
            // Any incremental task that shows up here has already been marked;
            // we didn't need to wait for it to finish to start downstream
            // tasks.
            assert(DepGraph.isMarked(FinishedCmd));
            break;
          case Job::Condition::RunWithoutCascading:
            // If this file changed, it might have been a non-cascading change
            // and it might not. Unfortunately, the interface hash has been
            // updated or compromised, so we don't actually know anymore; we
            // have to conservatively assume the changes could affect other
            // files.
            DepGraph.markTransitive(Dependents, FinishedCmd,
                                    IncrementalTracer);
            break;
          case Job::Condition::CheckDependencies:
            // If the only reason we're running this is because something else
            // changed, then we can trust the dependency graph as to whether
            // it's a cascading or non-cascading change. That is, if whatever
            // /caused/ the error isn't supposed to affect other files, and
            // whatever /fixes/ the error isn't supposed to affect other files,
            // then there's no need to recompile any other inputs. If either of
            // those are false, we /do/ need to recompile other inputs.
            break;
          }
        }
      }
    }

    /// Check to see if a job produced a zero-length serialized diagnostics
    /// file, which is used to indicate batch-constituents that were batched
    /// together with a failing constituent but did not, themselves, produce any
    /// errors.
    bool jobWasBatchedWithFailingJobs(const Job *J) const {
      auto DiaPath =
        J->getOutput().getAnyOutputForType(file_types::TY_SerializedDiagnostics);
      if (DiaPath.empty())
        return false;
      if (!llvm::sys::fs::is_regular_file(DiaPath))
        return false;
      uint64_t Size;
      auto EC = llvm::sys::fs::file_size(DiaPath, Size);
      if (EC)
        return false;
      return Size == 0;
    }

    /// If a batch-constituent job happens to be batched together with a job
    /// that exits with an error, the batch-constituent may be considered
    /// "cancelled".
    bool jobIsCancelledBatchConstituent(int ReturnCode,
                                        const Job *ContainerJob,
                                        const Job *ConstituentJob) {
      return ReturnCode != 0 &&
        isBatchJob(ContainerJob) &&
        jobWasBatchedWithFailingJobs(ConstituentJob);
    }

    /// Unpack a \c BatchJob that has finished into its constituent \c Job
    /// members, and call \c taskFinished on each, propagating any \c
    /// TaskFinishedResponse other than \c
    /// TaskFinishedResponse::ContinueExecution from any of the constituent
    /// calls.
    TaskFinishedResponse
    unpackAndFinishBatch(int ReturnCode, StringRef Output,
                         StringRef Errors, const BatchJob *B) {
      if (Comp.getShowJobLifecycle())
        llvm::outs() << "Batch job finished: " << LogJob(B) << "\n";
      auto res = TaskFinishedResponse::ContinueExecution;
      for (const Job *J : B->getCombinedJobs()) {
        if (Comp.getShowJobLifecycle())
          llvm::outs() << "  ==> Unpacked batch constituent finished: "
                       << LogJob(J) << "\n";
        auto r = taskFinished(
            llvm::sys::ProcessInfo::InvalidPid, ReturnCode, Output, Errors,
            TaskProcessInformation(llvm::sys::ProcessInfo::InvalidPid),
            (void *)J);
        if (r != TaskFinishedResponse::ContinueExecution)
          res = r;
      }
      return res;
    }

    void
    emitParseableOutputForEachFinishedJob(ProcessId Pid, int ReturnCode,
                                          StringRef Output,
                                          const Job *FinishedCmd,
                                          TaskProcessInformation ProcInfo) {
      FinishedCmd->forEachContainedJobAndPID(Pid, [&](const Job *J,
                                                      Job::PID P) {
        if (jobIsCancelledBatchConstituent(ReturnCode, FinishedCmd, J)) {
          // Simulate SIGINT-interruption to parseable-output consumer for any
          // constituent of a failing batch job that produced no errors of its
          // own.
          parseable_output::emitSignalledMessage(llvm::errs(), *J, P,
                                                 "cancelled batch constituent",
                                                 "", SIGINT, ProcInfo);
        } else {
          parseable_output::emitFinishedMessage(llvm::errs(), *J, P, ReturnCode,
                                                Output, ProcInfo);
        }
      });
    }

    /// Callback which will be called immediately after a task has finished
    /// execution. Determines if execution should continue, and also schedule
    /// any additional Jobs which we now know we need to run.
    TaskFinishedResponse taskFinished(ProcessId Pid, int ReturnCode,
                                      StringRef Output, StringRef Errors,
                                      TaskProcessInformation ProcInfo,
                                      void *Context) {
      const Job *const FinishedCmd = (const Job *)Context;

      if (Pid != llvm::sys::ProcessInfo::InvalidPid) {

        if (Comp.getShowDriverTimeCompilation()) {
          DriverTimers[FinishedCmd]->stopTimer();
        }
        processOutputOfFinishedProcess(Pid, ReturnCode, FinishedCmd, Output,
                                       ProcInfo);
      }

      if (Comp.getStatsReporter() && ProcInfo.getResourceUsage().hasValue())
        Comp.getStatsReporter()->recordJobMaxRSS(
            ProcInfo.getResourceUsage()->Maxrss);

      if (isBatchJob(FinishedCmd)) {
        return unpackAndFinishBatch(ReturnCode, Output, Errors,
                                    static_cast<const BatchJob *>(FinishedCmd));
      }

      SmallVector<const Job *, 16> Dependents;
      if (!Comp.getUseSourceRangeDependencies())
        Dependents = subsequentJobsNeededForDeps(FinishedCmd, ReturnCode);
      else {
        SmallVector<const Job *, 16> DependentsForDeps;
        std::tie(Dependents, DependentsForDeps) =
            subsequentJobsNeededForRanges(FinishedCmd, ReturnCode);
        // Will reload again, sigh
        if (Comp.getShowIncrementalBuildDecisions() &&
            (!DependentsForDeps.empty() || !Dependents.empty())) {
          llvm::outs() << "After completion of " << LogJob(FinishedCmd) << ": ";
          for (auto const *Cmd : DependentsForDeps)
            llvm::outs() << "Dependencies would now schedule: " << LogJob(Cmd)
                         << "\n";
          for (auto const *Cmd : Dependents)
            llvm::outs() << "Source ranges will now schedule: " << LogJob(Cmd)
                         << "\n";
          llvm::outs() << "For an additional " << DependentsForDeps.size()
                       << " (deps) vs " << Dependents.size() << " (ranges)\n";
        }
      }

      if (ReturnCode != EXIT_SUCCESS)
        return taskFailed(FinishedCmd, ReturnCode);

      // When a task finishes, we need to reevaluate the other commands that
      // might have been blocked.
      markFinished(FinishedCmd);

      for (const Job *Cmd : Dependents) {
        DeferredCommands.erase(Cmd);
        noteBuilding(Cmd, Comp.getUseSourceRangeDependencies(),
                     "because of dependencies discovered later");
        scheduleCommandIfNecessaryAndPossible(Cmd);
      }

      return TaskFinishedResponse::ContinueExecution;
    }

    TaskFinishedResponse taskFailed(const Job *FinishedCmd,
                                    const int ReturnCode) {
      // The task failed, so return true without performing any further
      // dependency analysis.

      // Store this task's ReturnCode as our Result if we haven't stored
      // anything yet.
      if (Result == EXIT_SUCCESS)
        Result = ReturnCode;

      if (!isa<CompileJobAction>(FinishedCmd->getSource()) ||
          ReturnCode != EXIT_FAILURE) {
        Comp.getDiags().diagnose(SourceLoc(), diag::error_command_failed,
                                 FinishedCmd->getSource().getClassName(),
                                 ReturnCode);
      }

      // See how ContinueBuildingAfterErrors gets set up in Driver.cpp for
      // more info.
      assert((Comp.getContinueBuildingAfterErrors() ||
              !Comp.getBatchModeEnabled()) &&
             "batch mode diagnostics require ContinueBuildingAfterErrors");

      return Comp.getContinueBuildingAfterErrors()
                 ? TaskFinishedResponse::ContinueExecution
                 : TaskFinishedResponse::StopExecution;
    }

    void processOutputOfFinishedProcess(ProcessId Pid, int ReturnCode,
                                        const Job *const FinishedCmd,
                                        StringRef Output,
                                        TaskProcessInformation ProcInfo) {
      switch (Comp.getOutputLevel()) {
      case OutputLevel::PrintJobs:
        // Only print the jobs, not the outputs
        break;
      case OutputLevel::Normal:
      case OutputLevel::Verbose:
        // Send the buffered output to stderr, though only if we
        // support getting buffered output.
        if (TaskQueue::supportsBufferingOutput())
          llvm::errs() << Output;
        break;
      case OutputLevel::Parseable:
        emitParseableOutputForEachFinishedJob(Pid, ReturnCode, Output,
                                              FinishedCmd, ProcInfo);
        break;
      }
    }

    /// In order to handle both old dependencies that have disappeared and new
    /// dependencies that have arisen, we need to reload the dependency file.
    /// Do this whether or not the build succeeded.
    ///
    /// Also need it when using source ranges to see if a new top-level
    /// name were introduced, for instance by changing a
    /// previously unparsed line to "} func foo() {".
    ///
    /// FIXME: too much global state floating around, e.g.
    /// getIncrementalBuildEnabled
    SmallVector<const Job *, 16>
    subsequentJobsNeededForDeps(const Job *FinishedCmd, const int ReturnCode) {
      if (!Comp.getIncrementalBuildEnabled())
        return {};
      SmallVector<const Job *, 16> Dependents;
      reloadAndRemarkDeps(FinishedCmd, ReturnCode, Dependents);
      return Dependents;
    }

    std::pair<SmallVector<const Job *, 16>, SmallVector<const Job *, 16>>
    subsequentJobsNeededForRanges(const Job *FinishedCmd,
                                  const int ReturnCode) {

      if (!Comp.getIncrementalBuildEnabled())
        return {};
      // FIXME: crude, could just use dependencies to schedule only those jobs
      // depending on the added tops
      assert(Comp.getEnableSourceRangeDependencies() &&
             "only implementing this for those");
      const size_t topsBefore = countTopLevelProvides(FinishedCmd);

      SmallVector<const Job *, 16> Dependents;
      reloadAndRemarkDeps(FinishedCmd, ReturnCode, Dependents);

      const size_t topsAfter = countTopLevelProvides(FinishedCmd);

      if (topsAfter <= topsBefore)
        return {{}, Dependents};

      // TODO: instead of scheduling *all* the jobs, could figure out
      // which ones use the introduced top-level names and only schedule those.
      // However, as it stands, the caller will fall back to the old
      // dependency scheme in this case anyway.
      return {Dependents, Dependents};
    }

    size_t countTopLevelProvides(const Job *Cmd) {
      return Comp.getEnableExperimentalDependencies()
                 ? ExpDepGraph.getValue().countTopLevelProvides(Cmd)
                 : StandardDepGraph.countTopLevelProvides(Cmd);
    }

    TaskFinishedResponse taskSignalled(ProcessId Pid, StringRef ErrorMsg,
                                       StringRef Output, StringRef Errors,
                                       void *Context, Optional<int> Signal,
                                       TaskProcessInformation ProcInfo) {
      const Job *SignalledCmd = (const Job *)Context;

      if (Comp.getShowDriverTimeCompilation()) {
        DriverTimers[SignalledCmd]->stopTimer();
      }

      if (Comp.getOutputLevel() == OutputLevel::Parseable) {
        // Parseable output was requested.
        SignalledCmd->forEachContainedJobAndPID(Pid, [&](const Job *J,
                                                         Job::PID P) {
          parseable_output::emitSignalledMessage(llvm::errs(), *J, P, ErrorMsg,
                                                 Output, Signal, ProcInfo);
        });
      } else {
        // Otherwise, send the buffered output to stderr, though only if we
        // support getting buffered output.
        if (TaskQueue::supportsBufferingOutput())
          llvm::errs() << Output;
      }

      if (Comp.getStatsReporter() && ProcInfo.getResourceUsage().hasValue())
        Comp.getStatsReporter()->recordJobMaxRSS(
            ProcInfo.getResourceUsage()->Maxrss);

      if (!ErrorMsg.empty())
        Comp.getDiags().diagnose(SourceLoc(),
                                 diag::error_unable_to_execute_command,
                                 ErrorMsg);

      if (Signal.hasValue()) {
        Comp.getDiags().diagnose(SourceLoc(), diag::error_command_signalled,
                                 SignalledCmd->getSource().getClassName(),
                                 Signal.getValue());
      } else {
        Comp.getDiags()
            .diagnose(SourceLoc(),
                      diag::error_command_signalled_without_signal_number,
                      SignalledCmd->getSource().getClassName());
      }

      // Since the task signalled, unconditionally set result to -2.
      Result = -2;
      AnyAbnormalExit = true;

      return TaskFinishedResponse::StopExecution;
    }

  public:
    PerformJobsState(Compilation &Comp, std::unique_ptr<TaskQueue> &&TaskQueue)
      : Comp(Comp), ActualIncrementalTracer(Comp.getStatsReporter()),
        TQ(std::move(TaskQueue)) {
      if (Comp.getEnableExperimentalDependencies())
        ExpDepGraph.emplace(
            Comp.getVerifyExperimentalDependencyGraphAfterEveryImport(),
            Comp.getEmitExperimentalDependencyDotFileAfterEveryImport(),
            Comp.getTraceDependencies(),
            Comp.getStatsReporter()
            );
      else if (Comp.getTraceDependencies())
        IncrementalTracer = &ActualIncrementalTracer;
    }

    /// Schedule and run initial, additional, and batch jobs.
    template <typename DependencyGraphT>
    void runJobs(DependencyGraphT &DepGraph) {
      scheduleJobsBeforeBatching(DepGraph);
      formBatchJobsAndAddPendingJobsToTaskQueue();
      runTaskQueueToCompletion();
      checkUnfinishedJobs(DepGraph);
    }

  private:
    template <typename DependencyGraphT>
    void scheduleJobsBeforeBatching(DependencyGraphT &DepGraph) {
      if (Comp.getIncrementalBuildEnabled())
        scheduleFirstRoundJobsForIncrementalCompilation(DepGraph);
      else
        scheduleJobsForNonIncrementalCompilation();
    }

    void scheduleJobsForNonIncrementalCompilation() {
      for (const Job *Cmd : Comp.getJobs())
        scheduleCommandIfNecessaryAndPossible(Cmd);
    }

    template <typename DependencyGraphT>
    void scheduleFirstRoundJobsForIncrementalCompilation(
        DependencyGraphT &DepGraph) {
      auto compileJobsToScheduleViaDependencies =
          computeDependenciesAndGetNeededCompileJobs(DepGraph);
      auto compileJobsToSchedule = compileJobsToScheduleViaDependencies;

      if (Comp.getEnableSourceRangeDependencies()) {
        auto compileJobsToScheduleViaSourceRanges =
            computeRangesAndGetNeededCompileJobs(DepGraph);
        // <= because of fewer casading jobs
        const size_t inputCount = Comp.getInputFiles().size();
        const bool useSourceRangeDependencies =
            compileJobsToScheduleViaSourceRanges.size() < inputCount;
        Comp.setUseSourceRangeDependencies(useSourceRangeDependencies);
        if (useSourceRangeDependencies)
          compileJobsToSchedule = compileJobsToScheduleViaSourceRanges;

        if (Comp.getShowIncrementalBuildDecisions()) {
          llvm::outs() << "Dependencies would run "
                       << compileJobsToScheduleViaDependencies.size()
                       << " compilations initially, source-ranges would run "
                       << compileJobsToScheduleViaSourceRanges.size()
                       << " compilations almost always; selecting "
                       << (Comp.getUseSourceRangeDependencies()
                               ? "source ranges"
                               : "dependencies")
                       << "\n";
        }
      }

      for (const Job *Cmd : Comp.getJobs()) {
        if (Cmd->getFirstSwiftPrimaryInput().empty() ||
            compileJobsToSchedule.count(Cmd))
          scheduleCommandIfNecessaryAndPossible(Cmd);
        else
          DeferredCommands.insert(Cmd);
      }
    }

    /// Return hadError
    template <typename DependencyGraphT>
    llvm::SmallPtrSet<const Job *, 16>
    computeRangesAndGetNeededCompileJobs(DependencyGraphT &DepGraph) {
      using namespace incremental_ranges;

      const bool dumpSwiftRanges =
          Comp.getArgs().hasArg(options::OPT_driver_dump_swift_ranges);
      const bool dumpCompiledSourceDiffs =
          Comp.getArgs().hasArg(options::OPT_driver_dump_compiled_source_diffs);

      const auto jobs = Comp.getJobsSimply();
      const auto allSourceRangeInfo =
          incremental_ranges::SourceRangeBasedInfo::loadAllInfo(
              jobs, Comp.getDiags(), Comp.getShowIncrementalBuildDecisions());

      incremental_ranges::SourceRangeBasedInfo::dumpAllInfo(
          allSourceRangeInfo, dumpCompiledSourceDiffs, dumpSwiftRanges);

      // As is, returns true and caller will fall back to dependencies.
      // But, since we register errors by recording massive changes to
      // primaries, could just keep on.
      // load dependencies for external dependencies and interfacehashes
      auto jobsToCompile = SourceRangeBasedInfo::
          neededCompileJobsForRangeBasedIncrementalCompilation(
              allSourceRangeInfo, Comp.getJobsSimply(),
              [&](const Job *Cmd) {
                scheduleCommandIfNecessaryAndPossible(Cmd);
              },
              [&](const Job *Cmd) { DeferredCommands.insert(Cmd); },
              [&](const Job *Cmd, Twine why) {
                noteBuilding(Cmd, true, why.str());
              });

      for (const Job *Cmd :
           externallyDependentJobsForRangeBasedIncrementalCompilation(DepGraph))
        jobsToCompile.insert(Cmd);
      return jobsToCompile;
    }

    template <typename DependencyGraphT>
    llvm::SmallPtrSet<const Job *, 16>
    computeDependenciesAndGetNeededCompileJobs(DependencyGraphT &DepGraph) {
      llvm::SmallPtrSet<const Job *, 16> jobsToSchedule;
      for (const Job *Cmd : Comp.getJobs()) {
        if (Cmd->getFirstSwiftPrimaryInput().empty())
          continue; // not Compile
        const bool shouldSched =
            isCompileJobInitiallyNeededForDependencyBasedIncrementalCompilation(
                Cmd, DepGraph);
        if (shouldSched)
          jobsToSchedule.insert(Cmd);
      }
      {
        const auto additionalJobs =
            additionalJobsToScheduleForDependencyBasedIncrementalCompilation(
                DepGraph);
        for (const auto *Cmd : additionalJobs)
          jobsToSchedule.insert(Cmd);
      }
      return jobsToSchedule;
    }

    /// Schedule all jobs we can from the initial list provided by Compilation.
    template <typename DependencyGraphT>
    bool isCompileJobInitiallyNeededForDependencyBasedIncrementalCompilation(
        const Job *Cmd, DependencyGraphT &DepGraph) {

      Job::Condition Cond;
      bool HasDependenciesFile;
      std::tie(Cond, HasDependenciesFile) =
          loadDependenciesAndComputeCondition(Cmd, DepGraph);

      const bool shouldSched = shouldScheduleCompileJobAccordingToCondition(
          Cmd, Cond, HasDependenciesFile, DepGraph);
      if (ExpDepGraph.hasValue())
        assert(ExpDepGraph.getValue().emitDotFileAndVerify(Comp.getDiags()));
      return shouldSched;
    }

    template <typename DependencyGraphT>
    std::pair<Job::Condition, bool>
    loadDependenciesAndComputeCondition(const Job *const Cmd,
                                        DependencyGraphT &DepGraph) {
      // Try to load the dependencies file for this job. If there isn't one, we
      // always have to run the job, but it doesn't affect any other jobs. If
      // there should be one but it's not present or can't be loaded, we have to
      // run all the jobs.
      // FIXME: We can probably do better here!
      if (Cmd->getCondition() == Job::Condition::NewlyAdded) {
        DepGraph.addIndependentNode(Cmd);
        return {Job::Condition::NewlyAdded, false};
      }

      const StringRef DependenciesFile =
          Cmd->getOutput().getAdditionalOutputForType(file_types::TY_SwiftDeps);
      if (DependenciesFile.empty())
        return {Job::Condition::Always, false};

      const auto loadResult =
          DepGraph.loadFromPath(Cmd, DependenciesFile, Comp.getDiags());
      switch (loadResult) {
      case DependencyGraphImpl::LoadResult::HadError:
        dependencyLoadFailed(DependenciesFile, /*Warn=*/false);
        return {Job::Condition::Always, true};
      case DependencyGraphImpl::LoadResult::UpToDate:
        return {Cmd->getCondition(), true};
      case DependencyGraphImpl::LoadResult::AffectsDownstream:
        if (Comp.getEnableExperimentalDependencies()) {
          // The experimental graph reports a change, since it lumps new
          // files together with new "Provides".
          return {Cmd->getCondition(), true};
        }
        llvm_unreachable("we haven't marked anything in this graph yet");
      }
    }

    template <typename DependencyGraphT>
    bool shouldScheduleCompileJobAccordingToCondition(
        const Job *const Cmd, const Job::Condition Condition,
        const bool hasDependenciesFile, DependencyGraphT &DepGraph) {
      switch (Condition) {
      case Job::Condition::Always:
      case Job::Condition::NewlyAdded:
        if (Comp.getIncrementalBuildEnabled() && hasDependenciesFile) {
          // Ensure dependents will get recompiled.
          InitialCascadingCommands.push_back(Cmd);
          // Mark this job as cascading.
          //
          // It would probably be safe and simpler to markTransitive on the
          // start nodes in the "Always" condition from the start instead of
          // using markIntransitive and having later functions call
          // markTransitive. That way markIntransitive would be an
          // implementation detail of DependencyGraph.
          DepGraph.markIntransitive(Cmd);
        }
        LLVM_FALLTHROUGH;
      case Job::Condition::RunWithoutCascading:
        noteBuilding(Cmd, false, "(initial)");
        return true;
      case Job::Condition::CheckDependencies:
        return false;
      }
    }

    /// Schedule transitive closure of initial jobs, and external jobs.
    template <typename DependencyGraphT>
    SmallVector<const Job *, 16>
    additionalJobsToScheduleForDependencyBasedIncrementalCompilation(
        DependencyGraphT &DepGraph) {
      SmallVector<const Job *, 16> AdditionalOutOfDateCommands =
          collectSecondaryJobsFromDependencyGraph(DepGraph);

      size_t firstSize = AdditionalOutOfDateCommands.size();

      // Check all cross-module dependencies as well.
      forEachOutOfDateExternalDependency(DepGraph, [&](StringRef dependency) {
        // If the dependency has been modified since the oldest built file,
        // or if we can't stat it for some reason (perhaps it's been
        // deleted?), trigger rebuilds through the dependency graph.
        DepGraph.markExternal(AdditionalOutOfDateCommands, dependency);
      });

      for (auto *externalCmd :
           llvm::makeArrayRef(AdditionalOutOfDateCommands).slice(firstSize)) {
        noteBuilding(externalCmd, false, "because of external dependencies");
      }
      return AdditionalOutOfDateCommands;
    }

    template <typename DependencyGraphT>
    SmallVector<const Job *, 16>
    externallyDependentJobsForRangeBasedIncrementalCompilation(
        DependencyGraphT &DepGraph) {
      SmallVector<const Job *, 16> results;
      forEachOutOfDateExternalDependency(
          DepGraph, [&](StringRef externalSwiftDeps) {
            DepGraph.forEachUnmarkedJobDirectlyDependentOnExternalSwiftdeps(
                externalSwiftDeps, [&](const void *node) {
                  // Sadly, the non-experimental dependency graph is type-unsafe
                  const Job *externalCmd = reinterpret_cast<const Job *>(node);
                  noteBuilding(externalCmd, true,
                               "because of external dependencies");
                  results.push_back(externalCmd);
                });
          });
      return results;
    }

    template <typename DependencyGraphT>
    void forEachOutOfDateExternalDependency(
        DependencyGraphT &DepGraph,
        function_ref<void(StringRef)> consumeExternalSwiftDeps) {
      for (StringRef dependency : DepGraph.getExternalDependencies()) {
        // If the dependency has been modified since the oldest built file,
        // or if we can't stat it for some reason (perhaps it's been
        // deleted?), trigger rebuilds through the dependency graph.
        llvm::sys::fs::file_status depStatus;
        if (llvm::sys::fs::status(dependency, depStatus) ||
            Comp.getLastBuildTime() < depStatus.getLastModificationTime())
          consumeExternalSwiftDeps(dependency);
      }
    }

    template <typename DependencyGraphT>
    SmallVector<const Job *, 16>
    collectSecondaryJobsFromDependencyGraph(DependencyGraphT &DepGraph) {
      SmallVector<const Job *, 16> AdditionalOutOfDateCommands;
      // We scheduled all of the files that have actually changed. Now add the
      // files that haven't changed, so that they'll get built in parallel if
      // possible and after the first set of files if it's not.
      for (auto *Cmd : InitialCascadingCommands) {
        DepGraph.markTransitive(AdditionalOutOfDateCommands, Cmd,
                                IncrementalTracer);
      }
      for (auto *transitiveCmd : AdditionalOutOfDateCommands)
        noteBuilding(transitiveCmd, false, "because of the initial set");

      return AdditionalOutOfDateCommands;
    }

    /// Insert all jobs in \p Cmds (of descriptive name \p Kind) to the \c
    /// TaskQueue, and clear \p Cmds.
    template <typename Container>
    void transferJobsToTaskQueue(Container &Cmds, StringRef Kind) {
      for (const Job *Cmd : Cmds) {
        if (Comp.getShowJobLifecycle())
          llvm::outs() << "Adding " << Kind
                       << " job to task queue: "
                       << LogJob(Cmd) << "\n";
        addPendingJobToTaskQueue(Cmd);
      }
      Cmds.clear();
    }

    /// Partition the jobs in \c PendingExecution into those that are \p
    /// Batchable and those that are \p NonBatchable, clearing \p
    /// PendingExecution.
    void getPendingBatchableJobs(CommandSetVector &Batchable,
                                 CommandSetVector &NonBatchable) {
      for (const Job *Cmd : PendingExecution) {
        if (Comp.getToolChain().jobIsBatchable(Comp, Cmd)) {
          if (Comp.getShowJobLifecycle())
            llvm::outs() << "Batchable: " << LogJob(Cmd) << "\n";
          Batchable.insert(Cmd);
        } else {
          if (Comp.getShowJobLifecycle())
            llvm::outs() << "Not batchable: " << LogJob(Cmd) << "\n";
          NonBatchable.insert(Cmd);
        }
      }
    }

    /// If \p Batch is nonempty, construct a new \c BatchJob from its
    /// contents by calling \p ToolChain::constructBatchJob, then insert the
    /// new \c BatchJob into \p Batches.
    void
    formBatchJobFromPartitionBatch(std::vector<const Job *> &Batches,
                                   std::vector<const Job *> const &Batch) {
      if (Batch.empty())
        return;
      if (Comp.getShowJobLifecycle())
        llvm::outs() << "Forming batch job from "
                     << Batch.size() << " constituents\n";
      auto const &TC = Comp.getToolChain();
      auto J = TC.constructBatchJob(Batch, NextBatchQuasiPID, Comp);
      if (J)
        Batches.push_back(Comp.addJob(std::move(J)));
    }

    /// Build a vector of partition indices, one per Job: the i'th index says
    /// which batch of the partition the i'th Job will be assigned to. If we are
    /// shuffling due to -driver-batch-seed, the returned indices will not be
    /// arranged in contiguous runs. We shuffle partition-indices here, not
    /// elements themselves, to preserve the invariant that each batch is a
    /// subsequence of the full set of inputs, not just a subset.
    std::vector<size_t>
    assignJobsToPartitions(size_t PartitionSize,
                           size_t NumJobs) {
      size_t Remainder = NumJobs % PartitionSize;
      size_t TargetSize = NumJobs / PartitionSize;
      std::vector<size_t> PartitionIndex;
      PartitionIndex.reserve(NumJobs);
      for (size_t P = 0; P < PartitionSize; ++P) {
        // Spread remainder evenly across partitions by adding 1 to the target
        // size of the first Remainder of them.
        size_t FillCount = TargetSize + ((P < Remainder) ? 1 : 0);
        std::fill_n(std::back_inserter(PartitionIndex), FillCount, P);
      }
      if (Comp.getBatchSeed() != 0) {
        std::minstd_rand gen(Comp.getBatchSeed());
        std::shuffle(PartitionIndex.begin(), PartitionIndex.end(), gen);
      }
      assert(PartitionIndex.size() == NumJobs);
      return PartitionIndex;
    }

    /// Create \c NumberOfParallelCommands batches and assign each job to a
    /// batch either filling each partition in order or, if seeded with a
    /// nonzero value, pseudo-randomly (but determinstically and nearly-evenly).
    void partitionIntoBatches(std::vector<const Job *> Batchable,
                              BatchPartition &Partition) {
      if (Comp.getShowJobLifecycle()) {
        llvm::outs() << "Found " << Batchable.size() << " batchable jobs\n";
        llvm::outs() << "Forming into " << Partition.size() << " batches\n";
      }

      assert(!Partition.empty());
      auto PartitionIndex = assignJobsToPartitions(Partition.size(),
                                                   Batchable.size());
      assert(PartitionIndex.size() == Batchable.size());
      auto const &TC = Comp.getToolChain();
      for_each(Batchable, PartitionIndex, [&](const Job *Cmd, size_t Idx) {
          assert(Idx < Partition.size());
          std::vector<const Job*> &P = Partition[Idx];
          if (P.empty() || TC.jobsAreBatchCombinable(Comp, P[0], Cmd)) {
            if (Comp.getShowJobLifecycle())
              llvm::outs() << "Adding " << LogJob(Cmd)
                           << " to batch " << Idx << '\n';
            P.push_back(Cmd);
          } else {
            // Strange but theoretically possible that we have a batchable job
            // that's not combinable with others; tack a new batch on for it.
            if (Comp.getShowJobLifecycle())
              llvm::outs() << "Adding " << LogJob(Cmd)
                           << " to new batch " << Partition.size() << '\n';
            Partition.push_back(std::vector<const Job*>());
            Partition.back().push_back(Cmd);
          }
        });
    }

    // Selects the number of partitions based on the user-provided batch
    // count and/or the number of parallel tasks we can run, subject to a
    // fixed per-batch safety cap, to avoid overcommitting memory.
    size_t pickNumberOfPartitions() {

      // If the user asked for something, use that.
      if (Comp.getBatchCount().hasValue())
        return Comp.getBatchCount().getValue();

      // This is a long comment to justify a simple calculation.
      //
      // Because there is a secondary "outer" build system potentially also
      // scheduling multiple drivers in parallel on separate build targets
      // -- while we, the driver, schedule our own subprocesses -- we might
      // be creating up to $NCPU^2 worth of _memory pressure_.
      //
      // Oversubscribing CPU is typically no problem these days, but
      // oversubscribing memory can lead to paging, which on modern systems
      // is quite bad.
      //
      // In practice, $NCPU^2 processes doesn't _quite_ happen: as core
      // count rises, it usually exceeds the number of large targets
      // without any dependencies between them (which are the only thing we
      // have to worry about): you might have (say) 2 large independent
      // modules * 2 architectures, but that's only an $NTARGET value of 4,
      // which is much less than $NCPU if you're on a 24 or 36-way machine.
      //
      //  So the actual number of concurrent processes is:
      //
      //     NCONCUR := $NCPU * min($NCPU, $NTARGET)
      //
      // Empirically, a frontend uses about 512kb RAM per non-primary file
      // and about 10mb per primary. The number of non-primaries per
      // process is a constant in a given module, but the number of
      // primaries -- the "batch size" -- is inversely proportional to the
      // batch count (default: $NCPU). As a result, the memory pressure
      // we can expect is:
      //
      //  $NCONCUR * (($NONPRIMARYMEM * $NFILE) +
      //              ($PRIMARYMEM * ($NFILE/$NCPU)))
      //
      // If we tabulate this across some plausible values, we see
      // unfortunate memory-pressure results:
      //
      //                          $NFILE
      //                  +---------------------
      //  $NTARGET $NCPU  |  100    500    1000
      //  ----------------+---------------------
      //     2        2   |  2gb   11gb    22gb
      //     4        4   |  4gb   24gb    48gb
      //     4        8   |  5gb   28gb    56gb
      //     4       16   |  7gb   36gb    72gb
      //     4       36   | 11gb   56gb   112gb
      //
      // As it happens, the lower parts of the table are dominated by
      // number of processes rather than the files-per-batch (the batches
      // are already quite small due to the high core count) and the left
      // side of the table is dealing with modules too small to worry
      // about. But the middle and upper-right quadrant is problematic: 4
      // and 8 core machines do not typically have 24-48gb of RAM, it'd be
      // nice not to page on them when building a 4-target project with
      // 500-file modules.
      //
      // Turns we can do that if we just cap the batch size statically at,
      // say, 25 files per batch, we get a better formula:
      //
      //  $NCONCUR * (($NONPRIMARYMEM * $NFILE) +
      //              ($PRIMARYMEM * min(25, ($NFILE/$NCPU))))
      //
      //                          $NFILE
      //                  +---------------------
      //  $NTARGET $NCPU  |  100    500    1000
      //  ----------------+---------------------
      //     2        2   |  1gb    2gb     3gb
      //     4        4   |  4gb    8gb    12gb
      //     4        8   |  5gb   16gb    24gb
      //     4       16   |  7gb   32gb    48gb
      //     4       36   | 11gb   56gb   108gb
      //
      // This means that the "performance win" of batch mode diminishes
      // slightly: the batching factor in the equation drops from
      // ($NFILE/$NCPU) to min(25, $NFILE/$NCPU). In practice this seems to
      // not cost too much: the additional factor in number of subprocesses
      // run is the following:
      //
      //                          $NFILE
      //                  +---------------------
      //  $NTARGET $NCPU  |  100    500    1000
      //  ----------------+---------------------
      //     2        2   |  2x    10x      20x
      //     4        4   |   -     5x      10x
      //     4        8   |   -   2.5x       5x
      //     4       16   |   -  1.25x     2.5x
      //     4       36   |   -      -     1.1x
      //
      // Where - means "no difference" because the batches were already
      // smaller than 25.
      //
      // Even in the worst case here, the 1000-file module on 2-core
      // machine is being built with only 40 subprocesses, rather than the
      // pre-batch-mode 1000. I.e. it's still running 96% fewer
      // subprocesses than before. And significantly: it's doing so while
      // not exceeding the RAM of a typical 2-core laptop.

      size_t DefaultSizeLimit = 25;
      size_t NumTasks = TQ->getNumberOfParallelTasks();
      size_t NumFiles = PendingExecution.size();
      size_t SizeLimit = Comp.getBatchSizeLimit().getValueOr(DefaultSizeLimit);
      return std::max(NumTasks, NumFiles / SizeLimit);
    }

    /// Select jobs that are batch-combinable from \c PendingExecution, combine
    /// them together into \p BatchJob instances (also inserted into \p
    /// BatchJobs), and enqueue all \c PendingExecution jobs (whether batched or
    /// not) into the \c TaskQueue for execution.
    void formBatchJobsAndAddPendingJobsToTaskQueue() {

      // If batch mode is not enabled, just transfer the set of pending jobs to
      // the task queue, as-is.
      if (!Comp.getBatchModeEnabled()) {
        transferJobsToTaskQueue(PendingExecution, "standard");
        return;
      }

      size_t NumPartitions = pickNumberOfPartitions();
      CommandSetVector Batchable, NonBatchable;
      std::vector<const Job *> Batches;

      // Split the batchable from non-batchable pending jobs.
      getPendingBatchableJobs(Batchable, NonBatchable);

      // Partition the batchable jobs into sets.
      BatchPartition Partition(NumPartitions);
      partitionIntoBatches(Batchable.takeVector(), Partition);

      // Construct a BatchJob from each batch in the partition.
      for (auto const &Batch : Partition) {
        formBatchJobFromPartitionBatch(Batches, Batch);
      }

      PendingExecution.clear();

      // Save batches so we can locate and decompose them on task-exit.
      for (const Job *Cmd : Batches)
        BatchJobs.insert(Cmd);

      // Enqueue the resulting jobs, batched and non-batched alike.
      transferJobsToTaskQueue(Batches, "batch");
      transferJobsToTaskQueue(NonBatchable, "non-batch");
    }

    void runTaskQueueToCompletion() {
      do {
        using namespace std::placeholders;
        // Ask the TaskQueue to execute.
        if (TQ->execute(std::bind(&PerformJobsState::taskBegan, this, _1, _2),
                        std::bind(&PerformJobsState::taskFinished, this, _1, _2,
                                  _3, _4, _5, _6),
                        std::bind(&PerformJobsState::taskSignalled, this, _1,
                                  _2, _3, _4, _5, _6, _7))) {
          if (Result == EXIT_SUCCESS) {
            // FIXME: Error from task queue while Result == EXIT_SUCCESS most
            // likely means some fork/exec or posix_spawn failed; TaskQueue saw
            // "an error" at some stage before even calling us with a process
            // exit / signal (or else a poll failed); unfortunately the task
            // causing it was dropped on the floor and we have no way to recover
            // it here, so we report a very poor, generic error.
            Comp.getDiags().diagnose(SourceLoc(),
                                     diag::error_unable_to_execute_command,
                                     "<unknown>");
            Result = -2;
            AnyAbnormalExit = true;
            return;
          }
        }

        // Returning without error from TaskQueue::execute should mean either an
        // empty TaskQueue or a failed subprocess.
        assert(!(Result == 0 && TQ->hasRemainingTasks()));

        // Task-exit callbacks from TaskQueue::execute may have unblocked jobs,
        // which means there might be PendingExecution jobs to enqueue here. If
        // there are, we need to continue trying to make progress on the
        // TaskQueue before we start marking deferred jobs as skipped, below.
        if (!PendingExecution.empty() && Result == 0) {
          formBatchJobsAndAddPendingJobsToTaskQueue();
          continue;
        }

        // If we got here, all the queued and pending work we know about is
        // done; mark anything still in deferred state as skipped.
        for (const Job *Cmd : DeferredCommands) {
          if (Comp.getOutputLevel() == OutputLevel::Parseable) {
            // Provide output indicating this command was skipped if parseable
            // output was requested.
            parseable_output::emitSkippedMessage(llvm::errs(), *Cmd);
          }
          ScheduledCommands.insert(Cmd);
          markFinished(Cmd, /*Skipped=*/true);
        }
        DeferredCommands.clear();

        // It's possible that by marking some jobs as skipped, we unblocked
        // some jobs and thus have entries in PendingExecution again; push
        // those through to the TaskQueue.
        formBatchJobsAndAddPendingJobsToTaskQueue();

        // If we added jobs to the TaskQueue, and we are not in an error state,
        // we want to give the TaskQueue another run.
      } while (Result == 0 && TQ->hasRemainingTasks());
    }

    template <typename DependencyGraphT>
    void checkUnfinishedJobs(DependencyGraphT &DepGraph) {
      if (Result == 0) {
        assert(BlockingCommands.empty() &&
               "some blocking commands never finished properly");
      } else {
        // Make sure we record any files that still need to be rebuilt.
        for (const Job *Cmd : Comp.getJobs()) {
          // Skip files that don't use dependency analysis.
          bool shouldHaveOutput = false;
          file_types::forEachIncrementalOutputType(
              [&](const file_types::ID type) {
                shouldHaveOutput |=
                    !Cmd->getOutput().getAdditionalOutputForType(type).empty();
              });
          if (!shouldHaveOutput)
            continue;

          // Don't worry about commands that finished or weren't going to run.
          if (FinishedCommands.count(Cmd))
            continue;
          if (!ScheduledCommands.count(Cmd))
            continue;

          bool isCascading = true;
          if (Comp.getIncrementalBuildEnabled())
            isCascading = DepGraph.isMarked(Cmd);
          UnfinishedCommands.insert({Cmd, isCascading});
        }
      }
    }

  public:
    void populateInputInfoMap(InputInfoMap &inputs) const {
      for (auto &entry : UnfinishedCommands) {
        for (auto *action : entry.first->getSource().getInputs()) {
          auto inputFile = dyn_cast<InputAction>(action);
          if (!inputFile)
            continue;

          CompileJobAction::InputInfo info;
          info.previousModTime = entry.first->getInputModTime();
          info.status = entry.second ?
            CompileJobAction::InputInfo::NeedsCascadingBuild :
            CompileJobAction::InputInfo::NeedsNonCascadingBuild;
          inputs[&inputFile->getInputArg()] = info;
        }
      }

      for (const Job *entry : FinishedCommands) {
        const auto *compileAction = dyn_cast<CompileJobAction>(&entry->getSource());
        if (!compileAction)
          continue;

        for (auto *action : compileAction->getInputs()) {
          auto inputFile = dyn_cast<InputAction>(action);
          if (!inputFile)
            continue;

          CompileJobAction::InputInfo info;
          info.previousModTime = entry->getInputModTime();
          info.status = CompileJobAction::InputInfo::UpToDate;
          inputs[&inputFile->getInputArg()] = info;
        }
      }

      // Sort the entries by input order.
      static_assert(IsTriviallyCopyable<CompileJobAction::InputInfo>::value,
                    "llvm::array_pod_sort relies on trivially-copyable data");
      using InputInfoEntry = std::decay<decltype(inputs.front())>::type;
      llvm::array_pod_sort(inputs.begin(), inputs.end(),
                           [](const InputInfoEntry *lhs,
                              const InputInfoEntry *rhs) -> int {
                             auto lhsIndex = lhs->first->getIndex();
                             auto rhsIndex = rhs->first->getIndex();
                             return (lhsIndex < rhsIndex) ? -1 : (lhsIndex > rhsIndex) ? 1 : 0;
                           });
    }

    int getResult() {
      if (Result == 0)
        Result = Comp.getDiags().hadAnyError();
      return Result;
    }

    bool hadAnyAbnormalExit() {
      return AnyAbnormalExit;
    }
  };
} // namespace driver
} // namespace swift

Compilation::~Compilation() = default;

Job *Compilation::addJob(std::unique_ptr<Job> J) {
  Job *result = J.get();
  Jobs.emplace_back(std::move(J));
  return result;
}

static void checkForOutOfDateInputs(DiagnosticEngine &diags,
                                    const InputInfoMap &inputs) {
  for (const auto &inputPair : inputs) {
    auto recordedModTime = inputPair.second.previousModTime;
    if (recordedModTime == llvm::sys::TimePoint<>::max())
      continue;

    const char *input = inputPair.first->getValue();

    llvm::sys::fs::file_status inputStatus;
    if (auto statError = llvm::sys::fs::status(input, inputStatus)) {
      diags.diagnose(SourceLoc(), diag::warn_cannot_stat_input,
                     llvm::sys::path::filename(input), statError.message());
      continue;
    }

    if (recordedModTime != inputStatus.getLastModificationTime()) {
      diags.diagnose(SourceLoc(), diag::error_input_changed_during_build,
                     llvm::sys::path::filename(input));
    }
  }
}

static void writeCompilationRecord(StringRef path, StringRef argsHash,
                                   llvm::sys::TimePoint<> buildTime,
                                   const InputInfoMap &inputs) {
  // Before writing to the dependencies file path, preserve any previous file
  // that may have been there. No error handling -- this is just a nicety, it
  // doesn't matter if it fails.
  llvm::sys::fs::rename(path, path + "~");

  std::error_code error;
  llvm::raw_fd_ostream out(path, error, llvm::sys::fs::F_None);
  if (out.has_error()) {
    // FIXME: How should we report this error?
    out.clear_error();
    return;
  }

  auto writeTimeValue = [](llvm::raw_ostream &out,
                           llvm::sys::TimePoint<> time) {
    using namespace std::chrono;
    auto secs = time_point_cast<seconds>(time);
    time -= secs.time_since_epoch(); // remainder in nanoseconds
    out << "[" << secs.time_since_epoch().count()
        << ", " << time.time_since_epoch().count() << "]";
  };

  using compilation_record::TopLevelKey;
  // NB: We calculate effective version from getCurrentLanguageVersion()
  // here because any -swift-version argument is handled in the
  // argsHash that follows.
  out << compilation_record::getName(TopLevelKey::Version) << ": \""
      << llvm::yaml::escape(version::getSwiftFullVersion(
                              swift::version::Version::getCurrentLanguageVersion()))
      << "\"\n";
  out << compilation_record::getName(TopLevelKey::Options) << ": \""
      << llvm::yaml::escape(argsHash) << "\"\n";
  out << compilation_record::getName(TopLevelKey::BuildTime) << ": ";
  writeTimeValue(out, buildTime);
  out << "\n";
  out << compilation_record::getName(TopLevelKey::Inputs) << ":\n";

  for (auto &entry : inputs) {
    out << "  \"" << llvm::yaml::escape(entry.first->getValue()) << "\": ";

    using compilation_record::getIdentifierForInputInfoStatus;
    auto Name = getIdentifierForInputInfoStatus(entry.second.status);
    if (!Name.empty()) {
      out << Name << " ";
    }

    writeTimeValue(out, entry.second.previousModTime);
    out << "\n";
  }
}

static bool writeFilelistIfNecessary(const Job *job, const ArgList &args,
                                     DiagnosticEngine &diags) {
  bool ok = true;
  for (const FilelistInfo &filelistInfo : job->getFilelistInfos()) {
    if (filelistInfo.path.empty())
      return true;

    std::error_code error;
    llvm::raw_fd_ostream out(filelistInfo.path, error, llvm::sys::fs::F_None);
    if (out.has_error()) {
      out.clear_error();
      diags.diagnose(SourceLoc(), diag::error_unable_to_make_temporary_file,
                     error.message());
      ok = false;
      continue;
    }

    switch (filelistInfo.whichFiles) {
    case FilelistInfo::WhichFiles::Input:
      // FIXME: Duplicated from ToolChains.cpp.
      for (const Job *input : job->getInputs()) {
        const CommandOutput &outputInfo = input->getOutput();
        if (outputInfo.getPrimaryOutputType() == filelistInfo.type) {
          for (auto &output : outputInfo.getPrimaryOutputFilenames())
            out << output << "\n";
        } else {
          auto output = outputInfo.getAnyOutputForType(filelistInfo.type);
          if (!output.empty())
            out << output << "\n";
        }
      }
      break;
    case FilelistInfo::WhichFiles::PrimaryInputs:
      // Ensure that -index-file-path works in conjunction with
      // -driver-use-filelists. It needs to be the only primary.
      if (Arg *A = args.getLastArg(options::OPT_index_file_path))
        out << A->getValue() << "\n";
      else {
        // The normal case for non-single-compile jobs.
        for (const Action *A : job->getSource().getInputs()) {
          // A could be a GeneratePCHJobAction
          if (!isa<InputAction>(A))
            continue;
          const auto *IA = cast<InputAction>(A);
          out << IA->getInputArg().getValue() << "\n";
        }
      }
      break;
    case FilelistInfo::WhichFiles::Output: {
      const CommandOutput &outputInfo = job->getOutput();
      assert(outputInfo.getPrimaryOutputType() == filelistInfo.type);
      for (auto &output : outputInfo.getPrimaryOutputFilenames())
        out << output << "\n";
      break;
    }
    case FilelistInfo::WhichFiles::SupplementaryOutput:
      job->getOutput().writeOutputFileMap(out);
      break;
    }
  }
  return ok;
}

int Compilation::performJobsImpl(bool &abnormalExit,
                                 std::unique_ptr<TaskQueue> &&TQ) {
  PerformJobsState State(*this, std::move(TQ));

  if (getEnableExperimentalDependencies())
    State.runJobs(State.ExpDepGraph.getValue());
  else
    State.runJobs(State.StandardDepGraph);

  if (!CompilationRecordPath.empty()) {
    InputInfoMap InputInfo;
    State.populateInputInfoMap(InputInfo);
    checkForOutOfDateInputs(Diags, InputInfo);
    writeCompilationRecord(CompilationRecordPath, ArgsHash, BuildStartTime,
                           InputInfo);

    if (OutputCompilationRecordForModuleOnlyBuild) {
      // TODO: Optimize with clonefile(2) ?
      llvm::sys::fs::copy_file(CompilationRecordPath,
                               CompilationRecordPath + "~moduleonly");
    }
  }
  if (State.ExpDepGraph.hasValue())
    assert(State.ExpDepGraph.getValue().emitDotFileAndVerify(getDiags()));
  abnormalExit = State.hadAnyAbnormalExit();
  return State.getResult();
}

int Compilation::performSingleCommand(const Job *Cmd) {
  assert(Cmd->getInputs().empty() &&
         "This can only be used to run a single command with no inputs");

  switch (Cmd->getCondition()) {
  case Job::Condition::CheckDependencies:
    return 0;
  case Job::Condition::RunWithoutCascading:
  case Job::Condition::Always:
  case Job::Condition::NewlyAdded:
    break;
  }

  if (!writeFilelistIfNecessary(Cmd, *TranslatedArgs.get(), Diags))
    return 1;

  switch (Level) {
  case OutputLevel::Normal:
  case OutputLevel::Parseable:
    break;
  case OutputLevel::PrintJobs:
    Cmd->printCommandLineAndEnvironment(llvm::outs());
    return 0;
  case OutputLevel::Verbose:
    Cmd->printCommandLine(llvm::errs());
    break;
  }

  SmallVector<const char *, 128> Argv;
  Argv.push_back(Cmd->getExecutable());
  Argv.append(Cmd->getArguments().begin(), Cmd->getArguments().end());
  Argv.push_back(nullptr);

  const char *ExecPath = Cmd->getExecutable();
  const char **argv = Argv.data();

  for (auto &envPair : Cmd->getExtraEnvironment()) {
#if defined(_MSC_VER)
    int envResult =_putenv_s(envPair.first, envPair.second);
#else
    int envResult = setenv(envPair.first, envPair.second, /*replacing=*/true);
#endif
    assert(envResult == 0 &&
          "expected environment variable to be set successfully");
    // Bail out early in release builds.
    if (envResult != 0) {
      return envResult;
    }
  }

  return ExecuteInPlace(ExecPath, argv);
}

static bool writeAllSourcesFile(DiagnosticEngine &diags, StringRef path,
                                ArrayRef<InputPair> inputFiles) {
  std::error_code error;
  llvm::raw_fd_ostream out(path, error, llvm::sys::fs::F_None);
  if (out.has_error()) {
    out.clear_error();
    diags.diagnose(SourceLoc(), diag::error_unable_to_make_temporary_file,
                   error.message());
    return false;
  }

  for (auto inputPair : inputFiles) {
    if (!file_types::isPartOfSwiftCompilation(inputPair.first))
      continue;
    out << inputPair.second->getValue() << "\n";
  }

  return true;
}

int Compilation::performJobs(std::unique_ptr<TaskQueue> &&TQ) {
  if (AllSourceFilesPath)
    if (!writeAllSourcesFile(Diags, AllSourceFilesPath, getInputFiles()))
      return EXIT_FAILURE;

  // If we don't have to do any cleanup work, just exec the subprocess.
  if (Level < OutputLevel::Parseable &&
      !ShowDriverTimeCompilation &&
      (SaveTemps || TempFilePaths.empty()) &&
      CompilationRecordPath.empty() &&
      Jobs.size() == 1) {
    return performSingleCommand(Jobs.front().get());
  }

  if (!TaskQueue::supportsParallelExecution() && TQ->getNumberOfParallelTasks() > 1) {
    Diags.diagnose(SourceLoc(), diag::warning_parallel_execution_not_supported);
  }

  bool abnormalExit;
  int result = performJobsImpl(abnormalExit, std::move(TQ));

  if (!SaveTemps) {
    for (const auto &pathPair : TempFilePaths) {
      if (!abnormalExit || pathPair.getValue() == PreserveOnSignal::No)
        (void)llvm::sys::fs::remove(pathPair.getKey());
    }
  }
  if (Stats)
    Stats->noteCurrentProcessExitStatus(result);
  return result;
}

const char *Compilation::getAllSourcesPath() const {
  if (!AllSourceFilesPath) {
    SmallString<128> Buffer;
    std::error_code EC =
        llvm::sys::fs::createTemporaryFile("sources", "", Buffer);
    if (EC) {
      // Use the constructor that prints both the error code and the
      // description.
      // FIXME: This should not take down the entire process.
      auto error = llvm::make_error<llvm::StringError>(
          EC,
          "- unable to create list of input sources");
      llvm::report_fatal_error(std::move(error));
    }
    auto *mutableThis = const_cast<Compilation *>(this);
    mutableThis->addTemporaryFile(Buffer.str(), PreserveOnSignal::Yes);
    mutableThis->AllSourceFilesPath = getArgs().MakeArgString(Buffer);
  }
  return AllSourceFilesPath;
}