//===- llvm/Orca/LockOpt.h - Definition of the LockOpt class --------*- C++ -*-===//
//
//                     The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
// 
//===----------------------------------------------------------------------===//
// Copyright 2013-2018 Azul Systems, Inc.  All Rights Reserved.
// http://www.azul.com
// Azul Systems is a contributor to the LLVM Team.
// Distributed under the same license terms detailed in LICENSE.TXT above.
//===----------------------------------------------------------------------===//
// \file
// This file provides the interface for Azul's own LockOpt pass.
// This pass does lock coarsening optimization.
///
//===----------------------------------------------------------------------===//

#ifndef LOCKOPT_H
#define LOCKOPT_H
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/IR/AssemblyAnnotationWriter.h"
#include "llvm/IR/PassManager.h"
#include "llvm/Support/Debug.h"
#include <optional>

namespace llvm {

class AssumptionCache;
class CallBase;
class Loop;
class LoopInfo;
class ScalarEvolution;
class DominatorTree;
class formatted_raw_ostream;
class TargetTransformInfo;

// We have three (non mutually exclusive) results after a lock optimization is
// done. ChangedIR and ChangedCFG is used to decide which analyses to preserve,
// while CoarsenedLocks decide when we stop the fixed-point iteration of lock
// optimizations.
struct LockOptResult {
  bool ChangedIR = false;
  bool ChangedCFG = false;
  bool CoarsenedLocks = false;
  void mergeResult(const LockOptResult Other) {
    ChangedIR |= Other.ChangedIR;
    ChangedCFG |= Other.ChangedCFG;
    CoarsenedLocks |= Other.CoarsenedLocks;
  }
};

class LockOpt : public PassInfoMixin<LockOpt> {
public:
  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);

  LockOptResult runImpl(Function &F, ScalarEvolution &, LoopInfo &, DominatorTree &,
               AssumptionCache *, TargetTransformInfo *);

private:
  LockOptResult tryLockCoarsening(CallBase *CB, DominatorTree *DT, LoopInfo *LI);

  using RPOTraversal = ReversePostOrderTraversal<const Function *>;
  // This function canonicalizes the loop for lock coarsening. It modifies the
  // CFG in the process. We also pass in the monitor exit found in loop L.
  std::optional<Loop *> canonicalizeLoopForCoarsening(Loop *L, LoopInfo *LI,
                                                      ScalarEvolution *SE,
                                                      DominatorTree *DT,
                                                      CallBase *MX,
                                                      bool &ChangedCFG);
  LockOptResult tryLockCoarseningInLoop(Loop *L, ScalarEvolution *SE, LoopInfo *LI,
                               DominatorTree *DT, AssumptionCache *AC,
                               TargetTransformInfo *TTI);

  LockOptResult tryLockCoarseningByChunkAndPeelLoop(Loop *ChunkedL, AssumptionCache *AC,
                                           ScalarEvolution *SE, LoopInfo *LI,
                                           DominatorTree *DT);

  LockOptResult eliminateNestedLocks(Function &F);

  LockOptResult iterateOnFunction(Function &F, ScalarEvolution *SE, LoopInfo *LI,
                         DominatorTree *DT, AssumptionCache *AC,
                         TargetTransformInfo *TTI, const bool VerifyLocks);
};

// This class contains utilities that can be used to do analysis over a function
// to find locked regions. It is used by nested lock analysis and will also be
// used by verifier to check if monitors are balanced.
class LockUtils {

  friend class LockOpt;
  friend class NestedLockAnalysis;
  friend class LockStackAnnotatedWriter;
  // LockStack keeps track of the monitorenters on a particular object (which is
  // defined as MonitorObject - see below) in a basic block.
  // When we see a MonitorEnter on MonitorObject, we push it on the stack.
  // We pop the stack when there's a monitor exit on MonitorObject.
  struct LockStack {
    SmallVector<CallBase *, 4> MonitorEntersStack;
    bool equals(LockStack &Other) {
      return MonitorEntersStack == Other.MonitorEntersStack;
    }
    // Used during Lock Verification to avoid false positives.
    bool equalsIgnoreDeoptState(const LockStack &Other) const;

    void print(raw_ostream &OS);
  };

  // This is calculated per object being locked.
  struct NestedLockInfo {
    // Object for which the NestedLockInfo is calculated.
    Value *MonitorObject;
    // When performing DFA, we populate BBEndStates with the LockStack seen at a
    // particular BB.
    DenseMap<BasicBlock *, LockStack> BBEndStates;

    // This map records blocks and their assumed BBEnd States (optimistically
    // assumed through DFA) for blocks where the BBEndState is unavailable.
    // These are the backedge predecessor blocks (latches in loops).
    DenseMap<BasicBlock *, LockStack> AssumedBBEndStates;

    // Map of MonitorEnters and the calls which inspect the lock state of that
    // lock.
    DenseMap<CallBase *, SmallVector<CallBase *, 4>> CallsInspectingLockState;
    // Map of monitor enter and the set of monitor exits that make up a nested
    // locked region. We use a MapVector to deterministically remove nested
    // locks.
    MapVector<CallBase *, SmallVector<CallBase *, 4>> NestedLockRegions;

    std::optional<LockStack> getBlockEndState(const BasicBlock *BB);
    bool visitBlock(BasicBlock *BB, const bool IsVerifierMode = false);

    // Returns true if BB is a return block and if it has unbalanced monitors.
    // This function expects BBEndStates to be already calculated for BB.
    bool returnBlockHasUnbalancedMonitors(BasicBlock *BB);

    // All monitor enters on LockStack have their lock state inspected by
    // InspectingCall. We update the CallsInspectingLockState map here.
    void updateLockInspectionMap(CallBase *InspectingCall, LockStack &LS);

		// Records if the NestedLockInfo is in a conflict state (i.e. we could not
		// complete the analysis through RPOT because some block(s) failed analysis
		// during visitBlock).
		bool ConflictState;
  };

  static bool VerifyBalancedLocks(Function &F, const bool InitialCheck);
  static bool VerifyDeoptState(CallBase *CB, const LockStack BBStack, Value *CurrObjLocked);
};

class NestedLockAnalysis {
  friend class LockOpt;
  friend class LockStackAnnotatedWriter;

  using RPOTraversal = ReversePostOrderTraversal<const Function *>;
  // Returns NestedLockInfo for V by performing RPO over the function.
  LockUtils::NestedLockInfo getNestedLockInfoForValue(Value *V, RPOTraversal &RPOT);

  // Used by the AnnotatedWriter.
  // When LockedMoreThanOnce is false, we calculateLockStacks for all Monitors.
  void
  calculateLockStacks(Function &F,
                      DenseMap<Value *, LockUtils::NestedLockInfo> &NestedLockInfoMap,
                      const bool LockedMoreThanOnce);
public:
  // Returns the set of nested monitors. Note that we do not do any additional
  // checks here on what allows us to eliminate these monitors (such as deopt
  // state inspection support). This is used in IdentifyMissedOpts.
  void getNestedMonitors(Function &F, SmallVectorImpl<CallBase *> &NestedMonitors);

};

class LockStackAnnotatedWriter : public AssemblyAnnotationWriter {
  DenseMap<Value *, LockUtils::NestedLockInfo> NestedLockInfoMap;

public:
  LockStackAnnotatedWriter(NestedLockAnalysis &NLA, Function &F) {
    // Calculate lock stacks for all monitors.
    NLA.calculateLockStacks(F, NestedLockInfoMap, false /*LockedMoreThanOnce*/);
  }

  virtual void emitBasicBlockEndAnnot(const llvm::BasicBlock *,
                                      formatted_raw_ostream &) override;
};

struct LockStackPrinterPass : PassInfoMixin<LockStackPrinterPass> {
  raw_ostream &OS = dbgs();
  LockStackPrinterPass() = default;
  PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM);
};

} // end namespace llvm
#endif
