using System; using System.Collections; using System.Collections.Generic; using System.Linq; using UnityEngine; using Unity.MLAgents; using Unity.MLAgents.Actuators; using Unity.MLAgents.Sensors; public class DeepMindHopperAgent : MarathonAgent { public List<float> RewardHackingVector; public override void OnEpisodeBegin() { base.OnEpisodeBegin(); // set to true this to show monitor while training //Monitor.SetActive(true); StepRewardFunction = StepRewardHopper101; TerminateFunction = TerminateOnNonFootHitTerrain; ObservationsFunction = ObservationsDefault; BodyParts["pelvis"] = GetComponentsInChildren<Rigidbody>().FirstOrDefault(x => x.name == "torso"); BodyParts["foot"] = GetComponentsInChildren<Rigidbody>().FirstOrDefault(x => x.name == "foot"); SetupBodyParts(); } void ObservationsDefault(VectorSensor sensor) { if (ShowMonitor) { } var pelvis = BodyParts["pelvis"]; Vector3 normalizedVelocity = this.GetNormalizedVelocity(pelvis.velocity); sensor.AddObservation(normalizedVelocity); sensor.AddObservation(pelvis.transform.forward); // gyroscope sensor.AddObservation(pelvis.transform.up); sensor.AddObservation(SensorIsInTouch); JointRotations.ForEach(x => sensor.AddObservation(x)); sensor.AddObservation(JointVelocity); var foot = BodyParts["foot"]; Vector3 normalizedFootPosition = this.GetNormalizedPosition(foot.transform.position); sensor.AddObservation(normalizedFootPosition.y); } float GetRewardOnEpisodeComplete() { return FocalPoint.transform.position.x; } void UpdateRewardHackingVector() { // float uprightBonus = GetForwardBonus("pelvis"); float uprightBonus = GetDirectionBonus("pelvis", Vector3.forward, 1f); uprightBonus = Mathf.Clamp(uprightBonus, 0f, 1f); float velocity = Mathf.Clamp(GetNormalizedVelocity("pelvis").x, 0f, 1f); float position = Mathf.Clamp(GetNormalizedPosition("pelvis").x, 0f, 1f); float effort = 1f - GetEffortNormalized(); if (RewardHackingVector?.Count == 0) RewardHackingVector = Enumerable.Range(0, 6).Select(x => 0f).ToList(); RewardHackingVector[0] = velocity; RewardHackingVector[1] = position; RewardHackingVector[2] = effort; RewardHackingVector[3] = uprightBonus; } float StepRewardHopper101() { UpdateRewardHackingVector(); float uprightBonus = GetDirectionBonus("pelvis", Vector3.forward, 1f); uprightBonus = Mathf.Clamp(uprightBonus, 0f, 1f); float velocity = Mathf.Clamp(GetNormalizedVelocity("pelvis").x, 0f, 1f); // float position = Mathf.Clamp(GetNormalizedPosition("pelvis").x, 0f, 1f); float effort = 1f - GetEffortNormalized(); uprightBonus *= 0.05f; velocity *= 0.7f; if (velocity >= .25f) effort *= 0.25f; else effort *= velocity; var reward = velocity + uprightBonus + effort; if (ShowMonitor) { //var hist = new[] {reward, velocity, uprightBonus, effort}; //Monitor.Log("rewardHist", hist, displayType: Monitor.DisplayType.Independent); } return reward; } }