| // Copyright 2015 the V8 project authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include "src/compiler/backend/instruction-scheduler.h" |
| |
| #include "src/base/iterator.h" |
| #include "src/base/optional.h" |
| #include "src/base/utils/random-number-generator.h" |
| |
| namespace v8 { |
| namespace internal { |
| namespace compiler { |
| |
| void InstructionScheduler::SchedulingQueueBase::AddNode( |
| ScheduleGraphNode* node) { |
| // We keep the ready list sorted by total latency so that we can quickly find |
| // the next best candidate to schedule. |
| auto it = nodes_.begin(); |
| while ((it != nodes_.end()) && |
| ((*it)->total_latency() >= node->total_latency())) { |
| ++it; |
| } |
| nodes_.insert(it, node); |
| } |
| |
| InstructionScheduler::ScheduleGraphNode* |
| InstructionScheduler::CriticalPathFirstQueue::PopBestCandidate(int cycle) { |
| DCHECK(!IsEmpty()); |
| auto candidate = nodes_.end(); |
| for (auto iterator = nodes_.begin(); iterator != nodes_.end(); ++iterator) { |
| // We only consider instructions that have all their operands ready. |
| if (cycle >= (*iterator)->start_cycle()) { |
| candidate = iterator; |
| break; |
| } |
| } |
| |
| if (candidate != nodes_.end()) { |
| ScheduleGraphNode* result = *candidate; |
| nodes_.erase(candidate); |
| return result; |
| } |
| |
| return nullptr; |
| } |
| |
| InstructionScheduler::ScheduleGraphNode* |
| InstructionScheduler::StressSchedulerQueue::PopBestCandidate(int cycle) { |
| DCHECK(!IsEmpty()); |
| // Choose a random element from the ready list. |
| auto candidate = nodes_.begin(); |
| std::advance(candidate, random_number_generator()->NextInt( |
| static_cast<int>(nodes_.size()))); |
| ScheduleGraphNode* result = *candidate; |
| nodes_.erase(candidate); |
| return result; |
| } |
| |
| InstructionScheduler::ScheduleGraphNode::ScheduleGraphNode(Zone* zone, |
| Instruction* instr) |
| : instr_(instr), |
| successors_(zone), |
| unscheduled_predecessors_count_(0), |
| latency_(GetInstructionLatency(instr)), |
| total_latency_(-1), |
| start_cycle_(-1) {} |
| |
| void InstructionScheduler::ScheduleGraphNode::AddSuccessor( |
| ScheduleGraphNode* node) { |
| successors_.push_back(node); |
| node->unscheduled_predecessors_count_++; |
| } |
| |
| InstructionScheduler::InstructionScheduler(Zone* zone, |
| InstructionSequence* sequence) |
| : zone_(zone), |
| sequence_(sequence), |
| graph_(zone), |
| last_side_effect_instr_(nullptr), |
| pending_loads_(zone), |
| last_live_in_reg_marker_(nullptr), |
| last_deopt_or_trap_(nullptr), |
| operands_map_(zone) { |
| if (FLAG_turbo_stress_instruction_scheduling) { |
| random_number_generator_ = |
| base::Optional<base::RandomNumberGenerator>(FLAG_random_seed); |
| } |
| } |
| |
| void InstructionScheduler::StartBlock(RpoNumber rpo) { |
| DCHECK(graph_.empty()); |
| DCHECK_NULL(last_side_effect_instr_); |
| DCHECK(pending_loads_.empty()); |
| DCHECK_NULL(last_live_in_reg_marker_); |
| DCHECK_NULL(last_deopt_or_trap_); |
| DCHECK(operands_map_.empty()); |
| sequence()->StartBlock(rpo); |
| } |
| |
| void InstructionScheduler::EndBlock(RpoNumber rpo) { |
| if (FLAG_turbo_stress_instruction_scheduling) { |
| Schedule<StressSchedulerQueue>(); |
| } else { |
| Schedule<CriticalPathFirstQueue>(); |
| } |
| sequence()->EndBlock(rpo); |
| } |
| |
| void InstructionScheduler::AddTerminator(Instruction* instr) { |
| ScheduleGraphNode* new_node = zone()->New<ScheduleGraphNode>(zone(), instr); |
| // Make sure that basic block terminators are not moved by adding them |
| // as successor of every instruction. |
| for (ScheduleGraphNode* node : graph_) { |
| node->AddSuccessor(new_node); |
| } |
| graph_.push_back(new_node); |
| } |
| |
| void InstructionScheduler::AddInstruction(Instruction* instr) { |
| if (IsBarrier(instr)) { |
| if (FLAG_turbo_stress_instruction_scheduling) { |
| Schedule<StressSchedulerQueue>(); |
| } else { |
| Schedule<CriticalPathFirstQueue>(); |
| } |
| sequence()->AddInstruction(instr); |
| return; |
| } |
| |
| ScheduleGraphNode* new_node = zone()->New<ScheduleGraphNode>(zone(), instr); |
| |
| // We should not have branches in the middle of a block. |
| DCHECK_NE(instr->flags_mode(), kFlags_branch); |
| DCHECK_NE(instr->flags_mode(), kFlags_branch_and_poison); |
| |
| if (IsFixedRegisterParameter(instr)) { |
| if (last_live_in_reg_marker_ != nullptr) { |
| last_live_in_reg_marker_->AddSuccessor(new_node); |
| } |
| last_live_in_reg_marker_ = new_node; |
| } else { |
| if (last_live_in_reg_marker_ != nullptr) { |
| last_live_in_reg_marker_->AddSuccessor(new_node); |
| } |
| |
| // Make sure that instructions are not scheduled before the last |
| // deoptimization or trap point when they depend on it. |
| if ((last_deopt_or_trap_ != nullptr) && DependsOnDeoptOrTrap(instr)) { |
| last_deopt_or_trap_->AddSuccessor(new_node); |
| } |
| |
| // Instructions with side effects and memory operations can't be |
| // reordered with respect to each other. |
| if (HasSideEffect(instr)) { |
| if (last_side_effect_instr_ != nullptr) { |
| last_side_effect_instr_->AddSuccessor(new_node); |
| } |
| for (ScheduleGraphNode* load : pending_loads_) { |
| load->AddSuccessor(new_node); |
| } |
| pending_loads_.clear(); |
| last_side_effect_instr_ = new_node; |
| } else if (IsLoadOperation(instr)) { |
| // Load operations can't be reordered with side effects instructions but |
| // independent loads can be reordered with respect to each other. |
| if (last_side_effect_instr_ != nullptr) { |
| last_side_effect_instr_->AddSuccessor(new_node); |
| } |
| pending_loads_.push_back(new_node); |
| } else if (instr->IsDeoptimizeCall() || instr->IsTrap()) { |
| // Ensure that deopts or traps are not reordered with respect to |
| // side-effect instructions. |
| if (last_side_effect_instr_ != nullptr) { |
| last_side_effect_instr_->AddSuccessor(new_node); |
| } |
| last_deopt_or_trap_ = new_node; |
| } |
| |
| // Look for operand dependencies. |
| for (size_t i = 0; i < instr->InputCount(); ++i) { |
| const InstructionOperand* input = instr->InputAt(i); |
| if (input->IsUnallocated()) { |
| int32_t vreg = UnallocatedOperand::cast(input)->virtual_register(); |
| auto it = operands_map_.find(vreg); |
| if (it != operands_map_.end()) { |
| it->second->AddSuccessor(new_node); |
| } |
| } |
| } |
| |
| // Record the virtual registers defined by this instruction. |
| for (size_t i = 0; i < instr->OutputCount(); ++i) { |
| const InstructionOperand* output = instr->OutputAt(i); |
| if (output->IsUnallocated()) { |
| operands_map_[UnallocatedOperand::cast(output)->virtual_register()] = |
| new_node; |
| } else if (output->IsConstant()) { |
| operands_map_[ConstantOperand::cast(output)->virtual_register()] = |
| new_node; |
| } |
| } |
| } |
| |
| graph_.push_back(new_node); |
| } |
| |
| template <typename QueueType> |
| void InstructionScheduler::Schedule() { |
| QueueType ready_list(this); |
| |
| // Compute total latencies so that we can schedule the critical path first. |
| ComputeTotalLatencies(); |
| |
| // Add nodes which don't have dependencies to the ready list. |
| for (ScheduleGraphNode* node : graph_) { |
| if (!node->HasUnscheduledPredecessor()) { |
| ready_list.AddNode(node); |
| } |
| } |
| |
| // Go through the ready list and schedule the instructions. |
| int cycle = 0; |
| while (!ready_list.IsEmpty()) { |
| ScheduleGraphNode* candidate = ready_list.PopBestCandidate(cycle); |
| |
| if (candidate != nullptr) { |
| sequence()->AddInstruction(candidate->instruction()); |
| |
| for (ScheduleGraphNode* successor : candidate->successors()) { |
| successor->DropUnscheduledPredecessor(); |
| successor->set_start_cycle( |
| std::max(successor->start_cycle(), cycle + candidate->latency())); |
| |
| if (!successor->HasUnscheduledPredecessor()) { |
| ready_list.AddNode(successor); |
| } |
| } |
| } |
| |
| cycle++; |
| } |
| |
| // Reset own state. |
| graph_.clear(); |
| operands_map_.clear(); |
| pending_loads_.clear(); |
| last_deopt_or_trap_ = nullptr; |
| last_live_in_reg_marker_ = nullptr; |
| last_side_effect_instr_ = nullptr; |
| } |
| |
| int InstructionScheduler::GetInstructionFlags(const Instruction* instr) const { |
| switch (instr->arch_opcode()) { |
| case kArchNop: |
| case kArchStackCheckOffset: |
| case kArchFramePointer: |
| case kArchParentFramePointer: |
| case kArchStackSlot: // Despite its name this opcode will produce a |
| // reference to a frame slot, so it is not affected |
| // by the arm64 dual stack issues mentioned below. |
| case kArchComment: |
| case kArchDeoptimize: |
| case kArchJmp: |
| case kArchBinarySearchSwitch: |
| case kArchRet: |
| case kArchTableSwitch: |
| case kArchThrowTerminator: |
| return kNoOpcodeFlags; |
| |
| case kArchTruncateDoubleToI: |
| case kIeee754Float64Acos: |
| case kIeee754Float64Acosh: |
| case kIeee754Float64Asin: |
| case kIeee754Float64Asinh: |
| case kIeee754Float64Atan: |
| case kIeee754Float64Atanh: |
| case kIeee754Float64Atan2: |
| case kIeee754Float64Cbrt: |
| case kIeee754Float64Cos: |
| case kIeee754Float64Cosh: |
| case kIeee754Float64Exp: |
| case kIeee754Float64Expm1: |
| case kIeee754Float64Log: |
| case kIeee754Float64Log1p: |
| case kIeee754Float64Log10: |
| case kIeee754Float64Log2: |
| case kIeee754Float64Pow: |
| case kIeee754Float64Sin: |
| case kIeee754Float64Sinh: |
| case kIeee754Float64Tan: |
| case kIeee754Float64Tanh: |
| return kNoOpcodeFlags; |
| |
| case kArchStackPointerGreaterThan: |
| // The ArchStackPointerGreaterThan instruction loads the current stack |
| // pointer value and must not be reordered with instructions with side |
| // effects. |
| return kIsLoadOperation; |
| |
| case kArchWordPoisonOnSpeculation: |
| // While poisoning operations have no side effect, they must not be |
| // reordered relative to branches. |
| return kHasSideEffect; |
| |
| case kArchPrepareCallCFunction: |
| case kArchPrepareTailCall: |
| case kArchTailCallCodeObjectFromJSFunction: |
| case kArchTailCallCodeObject: |
| case kArchTailCallAddress: |
| case kArchTailCallWasm: |
| case kArchAbortCSAAssert: |
| return kHasSideEffect; |
| |
| case kArchDebugBreak: |
| return kIsBarrier; |
| |
| case kArchSaveCallerRegisters: |
| case kArchRestoreCallerRegisters: |
| return kIsBarrier; |
| |
| case kArchCallCFunction: |
| case kArchCallCodeObject: |
| case kArchCallJSFunction: |
| case kArchCallWasmFunction: |
| case kArchCallBuiltinPointer: |
| // Calls can cause GC and GC may relocate objects. If a pure instruction |
| // operates on a tagged pointer that was cast to a word then it may be |
| // incorrect to move the instruction across the call. Hence we mark all |
| // (non-tail-)calls as barriers. |
| return kIsBarrier; |
| |
| case kArchStoreWithWriteBarrier: |
| return kHasSideEffect; |
| |
| case kWord32AtomicLoadInt8: |
| case kWord32AtomicLoadUint8: |
| case kWord32AtomicLoadInt16: |
| case kWord32AtomicLoadUint16: |
| case kWord32AtomicLoadWord32: |
| return kIsLoadOperation; |
| |
| case kWord32AtomicStoreWord8: |
| case kWord32AtomicStoreWord16: |
| case kWord32AtomicStoreWord32: |
| return kHasSideEffect; |
| |
| case kWord32AtomicExchangeInt8: |
| case kWord32AtomicExchangeUint8: |
| case kWord32AtomicExchangeInt16: |
| case kWord32AtomicExchangeUint16: |
| case kWord32AtomicExchangeWord32: |
| case kWord32AtomicCompareExchangeInt8: |
| case kWord32AtomicCompareExchangeUint8: |
| case kWord32AtomicCompareExchangeInt16: |
| case kWord32AtomicCompareExchangeUint16: |
| case kWord32AtomicCompareExchangeWord32: |
| case kWord32AtomicAddInt8: |
| case kWord32AtomicAddUint8: |
| case kWord32AtomicAddInt16: |
| case kWord32AtomicAddUint16: |
| case kWord32AtomicAddWord32: |
| case kWord32AtomicSubInt8: |
| case kWord32AtomicSubUint8: |
| case kWord32AtomicSubInt16: |
| case kWord32AtomicSubUint16: |
| case kWord32AtomicSubWord32: |
| case kWord32AtomicAndInt8: |
| case kWord32AtomicAndUint8: |
| case kWord32AtomicAndInt16: |
| case kWord32AtomicAndUint16: |
| case kWord32AtomicAndWord32: |
| case kWord32AtomicOrInt8: |
| case kWord32AtomicOrUint8: |
| case kWord32AtomicOrInt16: |
| case kWord32AtomicOrUint16: |
| case kWord32AtomicOrWord32: |
| case kWord32AtomicXorInt8: |
| case kWord32AtomicXorUint8: |
| case kWord32AtomicXorInt16: |
| case kWord32AtomicXorUint16: |
| case kWord32AtomicXorWord32: |
| return kHasSideEffect; |
| |
| #define CASE(Name) case k##Name: |
| TARGET_ARCH_OPCODE_LIST(CASE) |
| #undef CASE |
| return GetTargetInstructionFlags(instr); |
| } |
| |
| UNREACHABLE(); |
| } |
| |
| void InstructionScheduler::ComputeTotalLatencies() { |
| for (ScheduleGraphNode* node : base::Reversed(graph_)) { |
| int max_latency = 0; |
| |
| for (ScheduleGraphNode* successor : node->successors()) { |
| DCHECK_NE(-1, successor->total_latency()); |
| if (successor->total_latency() > max_latency) { |
| max_latency = successor->total_latency(); |
| } |
| } |
| |
| node->set_total_latency(max_latency + node->latency()); |
| } |
| } |
| |
| } // namespace compiler |
| } // namespace internal |
| } // namespace v8 |