| using System.Collections.Generic; |
| using System.Linq; |
| using Unity.MLAgents.Inference.Utils; |
| using Unity.MLAgents.Actuators; |
| using Unity.Barracuda; |
| using UnityEngine; |
|
|
| namespace Unity.MLAgents.Inference |
| { |
| |
| |
| |
| |
| internal class ContinuousActionOutputApplier : TensorApplier.IApplier |
| { |
| readonly ActionSpec m_ActionSpec; |
|
|
| public ContinuousActionOutputApplier(ActionSpec actionSpec) |
| { |
| m_ActionSpec = actionSpec; |
| } |
|
|
| public void Apply(TensorProxy tensorProxy, IList<int> actionIds, Dictionary<int, ActionBuffers> lastActions) |
| { |
| var actionSize = tensorProxy.shape[tensorProxy.shape.Length - 1]; |
| var agentIndex = 0; |
| for (var i = 0; i < actionIds.Count; i++) |
| { |
| var agentId = actionIds[i]; |
| if (lastActions.ContainsKey(agentId)) |
| { |
| var actionBuffer = lastActions[agentId]; |
| if (actionBuffer.IsEmpty()) |
| { |
| actionBuffer = new ActionBuffers(m_ActionSpec); |
| lastActions[agentId] = actionBuffer; |
| } |
| var continuousBuffer = actionBuffer.ContinuousActions; |
| for (var j = 0; j < actionSize; j++) |
| { |
| continuousBuffer[j] = tensorProxy.data[agentIndex, j]; |
| } |
| } |
| agentIndex++; |
| } |
| } |
| } |
|
|
| |
| |
| |
| internal class DiscreteActionOutputApplier : TensorApplier.IApplier |
| { |
| readonly ActionSpec m_ActionSpec; |
|
|
|
|
| public DiscreteActionOutputApplier(ActionSpec actionSpec, int seed, ITensorAllocator allocator) |
| { |
| m_ActionSpec = actionSpec; |
| } |
|
|
| public void Apply(TensorProxy tensorProxy, IList<int> actionIds, Dictionary<int, ActionBuffers> lastActions) |
| { |
| var agentIndex = 0; |
| var actionSize = tensorProxy.shape[tensorProxy.shape.Length - 1]; |
| for (var i = 0; i < actionIds.Count; i++) |
| { |
| var agentId = actionIds[i]; |
| if (lastActions.ContainsKey(agentId)) |
| { |
| var actionBuffer = lastActions[agentId]; |
| if (actionBuffer.IsEmpty()) |
| { |
| actionBuffer = new ActionBuffers(m_ActionSpec); |
| lastActions[agentId] = actionBuffer; |
| } |
| var discreteBuffer = actionBuffer.DiscreteActions; |
| for (var j = 0; j < actionSize; j++) |
| { |
| discreteBuffer[j] = (int)tensorProxy.data[agentIndex, j]; |
| } |
| } |
| agentIndex++; |
| } |
| } |
| } |
|
|
|
|
| |
| |
| |
| |
| internal class LegacyDiscreteActionOutputApplier : TensorApplier.IApplier |
| { |
| readonly int[] m_ActionSize; |
| readonly Multinomial m_Multinomial; |
| readonly ActionSpec m_ActionSpec; |
| readonly int[] m_StartActionIndices; |
| readonly float[] m_CdfBuffer; |
|
|
|
|
| public LegacyDiscreteActionOutputApplier(ActionSpec actionSpec, int seed, ITensorAllocator allocator) |
| { |
| m_ActionSize = actionSpec.BranchSizes; |
| m_Multinomial = new Multinomial(seed); |
| m_ActionSpec = actionSpec; |
| m_StartActionIndices = Utilities.CumSum(m_ActionSize); |
|
|
| |
| |
| var largestBranch = Mathf.Max(m_ActionSize); |
| m_CdfBuffer = new float[largestBranch]; |
| } |
|
|
| public void Apply(TensorProxy tensorProxy, IList<int> actionIds, Dictionary<int, ActionBuffers> lastActions) |
| { |
| var agentIndex = 0; |
| for (var i = 0; i < actionIds.Count; i++) |
| { |
| var agentId = actionIds[i]; |
| if (lastActions.ContainsKey(agentId)) |
| { |
| var actionBuffer = lastActions[agentId]; |
| if (actionBuffer.IsEmpty()) |
| { |
| actionBuffer = new ActionBuffers(m_ActionSpec); |
| lastActions[agentId] = actionBuffer; |
| } |
| var discreteBuffer = actionBuffer.DiscreteActions; |
| for (var j = 0; j < m_ActionSize.Length; j++) |
| { |
| ComputeCdf(tensorProxy, agentIndex, m_StartActionIndices[j], m_ActionSize[j]); |
| discreteBuffer[j] = m_Multinomial.Sample(m_CdfBuffer, m_ActionSize[j]); |
| } |
| } |
| agentIndex++; |
| } |
| } |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| internal void ComputeCdf(TensorProxy logProbs, int batch, int channelOffset, int branchSize) |
| { |
| |
| var maxProb = float.NegativeInfinity; |
| for (var cls = 0; cls < branchSize; ++cls) |
| { |
| maxProb = Mathf.Max(logProbs.data[batch, cls + channelOffset], maxProb); |
| } |
|
|
| |
| var sumProb = 0.0f; |
| for (var cls = 0; cls < branchSize; ++cls) |
| { |
| sumProb += Mathf.Exp(logProbs.data[batch, cls + channelOffset] - maxProb); |
| m_CdfBuffer[cls] = sumProb; |
| } |
| } |
| } |
|
|
| |
| |
| |
| |
| internal class MemoryOutputApplier : TensorApplier.IApplier |
| { |
| Dictionary<int, List<float>> m_Memories; |
|
|
| public MemoryOutputApplier( |
| Dictionary<int, List<float>> memories) |
| { |
| m_Memories = memories; |
| } |
|
|
| public void Apply(TensorProxy tensorProxy, IList<int> actionIds, Dictionary<int, ActionBuffers> lastActions) |
| { |
| var agentIndex = 0; |
| var memorySize = tensorProxy.data.width; |
| for (var i = 0; i < actionIds.Count; i++) |
| { |
| var agentId = actionIds[i]; |
| List<float> memory; |
| if (!m_Memories.TryGetValue(agentId, out memory) |
| || memory.Count < memorySize) |
| { |
| memory = new List<float>(); |
| memory.AddRange(Enumerable.Repeat(0f, memorySize)); |
| } |
|
|
| for (var j = 0; j < memorySize; j++) |
| { |
| memory[j] = tensorProxy.data[agentIndex, 0, j, 0]; |
| } |
|
|
| m_Memories[agentId] = memory; |
| agentIndex++; |
| } |
| } |
| } |
| } |
|
|