implementation of drecon in unity 2022 lts
forked from:
https://github.com/joanllobera/marathon-envs
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
82 lines
2.9 KiB
82 lines
2.9 KiB
10 months ago
|
using System;
|
||
|
using System.Collections;
|
||
|
using System.Collections.Generic;
|
||
|
using System.Linq;
|
||
|
using UnityEngine;
|
||
|
using Unity.MLAgents;
|
||
|
using Unity.MLAgents.Actuators;
|
||
|
using Unity.MLAgents.Sensors;
|
||
|
|
||
|
public class DeepMindWalkerAgent : MarathonAgent
|
||
|
{
|
||
|
public override void OnEpisodeBegin()
|
||
|
{
|
||
|
base.OnEpisodeBegin();
|
||
|
|
||
|
// set to true this to show monitor while training
|
||
|
//Monitor.SetActive(true);
|
||
|
|
||
|
StepRewardFunction = StepRewardWalker106;
|
||
|
TerminateFunction = TerminateOnNonFootHitTerrain;
|
||
|
ObservationsFunction = ObservationsDefault;
|
||
|
|
||
|
BodyParts["pelvis"] = GetComponentsInChildren<Rigidbody>().FirstOrDefault(x => x.name == "torso");
|
||
|
BodyParts["left_thigh"] = GetComponentsInChildren<Rigidbody>().FirstOrDefault(x => x.name == "left_thigh");
|
||
|
BodyParts["right_thigh"] = GetComponentsInChildren<Rigidbody>().FirstOrDefault(x => x.name == "right_thigh");
|
||
|
BodyParts["right_foot"] = GetComponentsInChildren<Rigidbody>().FirstOrDefault(x => x.name == "right_foot");
|
||
|
BodyParts["left_foot"] = GetComponentsInChildren<Rigidbody>().FirstOrDefault(x => x.name == "left_foot");
|
||
|
SetupBodyParts();
|
||
|
}
|
||
|
|
||
|
void ObservationsDefault(VectorSensor sensor)
|
||
|
{
|
||
|
if (ShowMonitor)
|
||
|
{
|
||
|
}
|
||
|
|
||
|
var pelvis = BodyParts["pelvis"];
|
||
|
Vector3 normalizedVelocity = this.GetNormalizedVelocity(pelvis.velocity);
|
||
|
sensor.AddObservation(normalizedVelocity);
|
||
|
sensor.AddObservation(pelvis.transform.forward); // gyroscope
|
||
|
sensor.AddObservation(pelvis.transform.up);
|
||
|
|
||
|
sensor.AddObservation(SensorIsInTouch);
|
||
|
JointRotations.ForEach(x => sensor.AddObservation(x));
|
||
|
sensor.AddObservation(JointVelocity);
|
||
|
sensor.AddObservation(new []{
|
||
|
this.GetNormalizedPosition(BodyParts["left_foot"].transform.position).y,
|
||
|
this.GetNormalizedPosition(BodyParts["right_foot"].transform.position).y
|
||
|
});
|
||
|
}
|
||
|
|
||
|
float StepRewardWalker106()
|
||
|
{
|
||
|
float heightPenality = 1f-GetHeightPenality(1.1f);
|
||
|
heightPenality = Mathf.Clamp(heightPenality, 0f, 1f);
|
||
|
float uprightBonus = GetDirectionBonus("pelvis", Vector3.forward, 1f);
|
||
|
uprightBonus = Mathf.Clamp(uprightBonus, 0f, 1f);
|
||
|
float velocity = Mathf.Clamp(GetNormalizedVelocity("pelvis").x, 0f, 1f);
|
||
|
float effort = 1f - GetEffortNormalized();
|
||
|
|
||
|
//if (ShowMonitor)
|
||
|
//{
|
||
|
// var hist = new[] {velocity, uprightBonus, heightPenality, effort}.ToList();
|
||
|
// Monitor.Log("rewardHist", hist.ToArray(), displayType: Monitor.DisplayType.Independent);
|
||
|
//}
|
||
|
|
||
|
heightPenality *= 0.05f;
|
||
|
uprightBonus *= 0.05f;
|
||
|
velocity *= 0.4f;
|
||
|
if (velocity >= .4f)
|
||
|
effort *= 0.4f;
|
||
|
else
|
||
|
effort *= velocity;
|
||
|
|
||
|
var reward = velocity
|
||
|
+ uprightBonus
|
||
|
+ heightPenality
|
||
|
+ effort;
|
||
|
|
||
|
return reward;
|
||
|
}
|
||
|
}
|