This commit is contained in:
2025-11-28 23:13:44 +05:30
commit a3a8e79709
7360 changed files with 1156074 additions and 0 deletions

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,542 @@
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2021 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#include "foundation/PxPreprocessor.h"
#include "PsVecMath.h"
#include "DyArticulationContactPrep.h"
#include "DySolverConstraintDesc.h"
#include "DySolverConstraint1D.h"
#include "DyArticulationHelper.h"
#include "PxcNpWorkUnit.h"
#include "PxsMaterialManager.h"
#include "PxsMaterialCombiner.h"
#include "DyCorrelationBuffer.h"
#include "DySolverConstraintExtShared.h"
#include "DyConstraintPrep.h"
using namespace physx::Gu;
namespace physx
{
namespace Dy
{
// constraint-gen only, since these use getVelocity methods
// which aren't valid during the solver phase
//PX_INLINE void computeFrictionTangents(const Ps::aos::Vec3V& vrel,const Ps::aos::Vec3V& unitNormal, Ps::aos::Vec3V& t0, Ps::aos::Vec3V& t1)
//{
// using namespace Ps::aos;
// //PX_ASSERT(PxAbs(unitNormal.magnitude()-1)<1e-3f);
//
// t0 = V3Sub(vrel, V3Scale(unitNormal, V3Dot(unitNormal, vrel)));
// const FloatV ll = V3Dot(t0, t0);
//
// if (FAllGrtr(ll, FLoad(1e10f))) //can set as low as 0.
// {
// t0 = V3Scale(t0, FRsqrt(ll));
// t1 = V3Cross(unitNormal, t0);
// }
// else
// Ps::normalToTangents(unitNormal, t0, t1); //fallback
//}
PxReal SolverExtBody::projectVelocity(const PxVec3& linear, const PxVec3& angular) const
{
if(mLinkIndex == PxSolverConstraintDesc::NO_LINK)
return mBodyData->projectVelocity(linear, angular);
else
{
const Cm::SpatialVectorV velocity = mArticulation->getLinkVelocity(mLinkIndex);
const FloatV fv = velocity.dot(Cm::SpatialVector(linear, angular));
PxF32 f;
FStore(fv, &f);
return f;
/*PxF32 f;
FStore(getVelocity(*mFsData)[mLinkIndex].dot(Cm::SpatialVector(linear, angular)), &f);
return f;*/
}
}
Ps::aos::FloatV SolverExtBody::projectVelocity(const Ps::aos::Vec3V& linear, const Ps::aos::Vec3V& angular) const
{
if (mLinkIndex == PxSolverConstraintDesc::NO_LINK)
{
return V3SumElems(V3Add(V3Mul(V3LoadA(mBodyData->linearVelocity), linear), V3Mul(V3LoadA(mBodyData->angularVelocity), angular)));
}
else
{
const Cm::SpatialVectorV velocity = mArticulation->getLinkVelocity(mLinkIndex);
return velocity.dot(Cm::SpatialVectorV(linear, angular));
}
}
PxVec3 SolverExtBody::getLinVel() const
{
if(mLinkIndex == PxSolverConstraintDesc::NO_LINK)
return mBodyData->linearVelocity;
else
{
const Vec3V linear = mArticulation->getLinkVelocity(mLinkIndex).linear;
PxVec3 result;
V3StoreU(linear, result);
/*PxVec3 result;
V3StoreU(getVelocity(*mFsData)[mLinkIndex].linear, result);*/
return result;
}
}
PxVec3 SolverExtBody::getAngVel() const
{
if(mLinkIndex == PxSolverConstraintDesc::NO_LINK)
return mBodyData->angularVelocity;
else
{
const Vec3V angular = mArticulation->getLinkVelocity(mLinkIndex).angular;
PxVec3 result;
V3StoreU(angular, result);
/*PxVec3 result;
V3StoreU(getVelocity(*mFsData)[mLinkIndex].angular, result);*/
return result;
}
}
Ps::aos::Vec3V SolverExtBody::getLinVelV() const
{
if (mLinkIndex == PxSolverConstraintDesc::NO_LINK)
return V3LoadA(mBodyData->linearVelocity);
else
{
return mArticulation->getLinkVelocity(mLinkIndex).linear;
}
}
Vec3V SolverExtBody::getAngVelV() const
{
if (mLinkIndex == PxSolverConstraintDesc::NO_LINK)
return V3LoadA(mBodyData->angularVelocity);
else
{
return mArticulation->getLinkVelocity(mLinkIndex).angular;
}
}
Cm::SpatialVectorV SolverExtBody::getVelocity() const
{
if(mLinkIndex == PxSolverConstraintDesc::NO_LINK)
return Cm::SpatialVectorV(V3LoadA(mBodyData->linearVelocity), V3LoadA(mBodyData->angularVelocity));
else
return mArticulation->getLinkVelocity(mLinkIndex);
}
Cm::SpatialVector createImpulseResponseVector(const PxVec3& linear, const PxVec3& angular, const SolverExtBody& body)
{
if(body.mLinkIndex == PxSolverConstraintDesc::NO_LINK)
return Cm::SpatialVector(linear, body.mBodyData->sqrtInvInertia * angular);
return Cm::SpatialVector(linear, angular);
}
Cm::SpatialVectorV createImpulseResponseVector(const Ps::aos::Vec3V& linear, const Ps::aos::Vec3V& angular, const SolverExtBody& body)
{
if (body.mLinkIndex == PxSolverConstraintDesc::NO_LINK)
{
return Cm::SpatialVectorV(linear, M33MulV3(M33Load(body.mBodyData->sqrtInvInertia), angular));
}
return Cm::SpatialVectorV(linear, angular);
}
PxReal getImpulseResponse(const SolverExtBody& b0, const Cm::SpatialVector& impulse0, Cm::SpatialVector& deltaV0, PxReal dom0, PxReal angDom0,
const SolverExtBody& b1, const Cm::SpatialVector& impulse1, Cm::SpatialVector& deltaV1, PxReal dom1, PxReal angDom1,
Cm::SpatialVectorF* Z, bool allowSelfCollision)
{
PxReal response;
allowSelfCollision = false;
// right now self-collision with contacts crashes the solver
//KS - knocked this out to save some space on SPU
if(allowSelfCollision && b0.mLinkIndex!=PxSolverConstraintDesc::NO_LINK && b0.mArticulation == b1.mArticulation && 0)
{
/*ArticulationHelper::getImpulseSelfResponse(*b0.mFsData,b0.mLinkIndex, impulse0, deltaV0,
b1.mLinkIndex, impulse1, deltaV1);*/
b0.mArticulation->getImpulseSelfResponse(b0.mLinkIndex, b1.mLinkIndex, Z, impulse0.scale(dom0, angDom0), impulse1.scale(dom1, angDom1),
deltaV0, deltaV1);
response = impulse0.dot(deltaV0) + impulse1.dot(deltaV1);
}
else
{
if(b0.mLinkIndex == PxSolverConstraintDesc::NO_LINK)
{
deltaV0.linear = impulse0.linear * b0.mBodyData->invMass * dom0;
deltaV0.angular = impulse0.angular * angDom0;
}
else
{
b0.mArticulation->getImpulseResponse(b0.mLinkIndex, Z, impulse0.scale(dom0, angDom0), deltaV0);
}
response = impulse0.dot(deltaV0);
if(b1.mLinkIndex == PxSolverConstraintDesc::NO_LINK)
{
deltaV1.linear = impulse1.linear * b1.mBodyData->invMass * dom1;
deltaV1.angular = impulse1.angular * angDom1;
}
else
{
b1.mArticulation->getImpulseResponse( b1.mLinkIndex, Z, impulse1.scale(dom1, angDom1), deltaV1);
}
response += impulse1.dot(deltaV1);
}
return response;
}
FloatV getImpulseResponse(const SolverExtBody& b0, const Cm::SpatialVectorV& impulse0, Cm::SpatialVectorV& deltaV0, const FloatV& dom0, const FloatV& angDom0,
const SolverExtBody& b1, const Cm::SpatialVectorV& impulse1, Cm::SpatialVectorV& deltaV1, const FloatV& dom1, const FloatV& angDom1,
Cm::SpatialVectorV* Z, bool /*allowSelfCollision*/)
{
Vec3V response;
{
if (b0.mLinkIndex == PxSolverConstraintDesc::NO_LINK)
{
deltaV0.linear = V3Scale(impulse0.linear, FMul(FLoad(b0.mBodyData->invMass), dom0));
deltaV0.angular = V3Scale(impulse0.angular, angDom0);
}
else
{
b0.mArticulation->getImpulseResponse(b0.mLinkIndex, Z, impulse0.scale(dom0, angDom0), deltaV0);
}
response = V3Add(V3Mul(impulse0.linear, deltaV0.linear), V3Mul(impulse0.angular, deltaV0.angular));
if (b1.mLinkIndex == PxSolverConstraintDesc::NO_LINK)
{
deltaV1.linear = V3Scale(impulse1.linear, FMul(FLoad(b1.mBodyData->invMass), dom1));
deltaV1.angular = V3Scale(impulse1.angular, angDom1);
}
else
{
b1.mArticulation->getImpulseResponse(b1.mLinkIndex, Z, impulse1.scale(dom1, angDom1), deltaV1);
}
response = V3Add(response, V3Add(V3Mul(impulse1.linear, deltaV1.linear), V3Mul(impulse1.angular, deltaV1.angular)));
}
return V3SumElems(response);
}
void setupFinalizeExtSolverContacts(
const ContactPoint* buffer,
const CorrelationBuffer& c,
const PxTransform& bodyFrame0,
const PxTransform& bodyFrame1,
PxU8* workspace,
const SolverExtBody& b0,
const SolverExtBody& b1,
const PxReal invDtF32,
PxReal bounceThresholdF32,
PxReal invMassScale0, PxReal invInertiaScale0,
PxReal invMassScale1, PxReal invInertiaScale1,
const PxReal restDist,
PxU8* frictionDataPtr,
PxReal ccdMaxContactDist,
Cm::SpatialVectorF* Z)
{
// NOTE II: the friction patches are sparse (some of them have no contact patches, and
// therefore did not get written back to the cache) but the patch addresses are dense,
// corresponding to valid patches
/*const bool haveFriction = PX_IR(n.staticFriction) > 0 || PX_IR(n.dynamicFriction) > 0;*/
const FloatV ccdMaxSeparation = FLoad(ccdMaxContactDist);
PxU8* PX_RESTRICT ptr = workspace;
const FloatV zero = FZero();
const PxF32 maxPenBias0 = b0.mLinkIndex == PxSolverConstraintDesc::NO_LINK ? b0.mBodyData->penBiasClamp : b0.mArticulation->getLinkMaxPenBias(b0.mLinkIndex);
const PxF32 maxPenBias1 = b1.mLinkIndex == PxSolverConstraintDesc::NO_LINK ? b1.mBodyData->penBiasClamp : b1.mArticulation->getLinkMaxPenBias(b1.mLinkIndex);
const FloatV maxPenBias = FLoad(PxMax(maxPenBias0, maxPenBias1));
const Vec3V frame0p = V3LoadU(bodyFrame0.p);
const Vec3V frame1p = V3LoadU(bodyFrame1.p);
const Cm::SpatialVectorV vel0 = b0.getVelocity();
const Cm::SpatialVectorV vel1 = b1.getVelocity();
const FloatV d0 = FLoad(invMassScale0);
const FloatV d1 = FLoad(invMassScale1);
const FloatV angD0 = FLoad(invInertiaScale0);
const FloatV angD1 = FLoad(invInertiaScale1);
Vec4V staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W = V4Zero();
staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W=V4SetZ(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W, d0);
staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W=V4SetW(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W, d1);
const FloatV restDistance = FLoad(restDist);
PxU32 frictionPatchWritebackAddrIndex = 0;
PxU32 contactWritebackCount = 0;
Ps::prefetchLine(c.contactID);
Ps::prefetchLine(c.contactID, 128);
const FloatV invDt = FLoad(invDtF32);
const FloatV p8 = FLoad(0.8f);
const FloatV bounceThreshold = FLoad(bounceThresholdF32);
const FloatV invDtp8 = FMul(invDt, p8);
PxU8 flags = 0;
for(PxU32 i=0;i<c.frictionPatchCount;i++)
{
PxU32 contactCount = c.frictionPatchContactCounts[i];
if(contactCount == 0)
continue;
const FrictionPatch& frictionPatch = c.frictionPatches[i];
PX_ASSERT(frictionPatch.anchorCount <= 2); //0==anchorCount is allowed if all the contacts in the manifold have a large offset.
const Gu::ContactPoint* contactBase0 = buffer + c.contactPatches[c.correlationListHeads[i]].start;
const PxReal combinedRestitution = contactBase0->restitution;
const PxReal coefficient = (contactBase0->materialFlags & PxMaterialFlag::eIMPROVED_PATCH_FRICTION && frictionPatch.anchorCount == 2) ? 0.5f : 1.f;
const PxReal staticFriction = contactBase0->staticFriction * coefficient;
const PxReal dynamicFriction = contactBase0->dynamicFriction * coefficient;
const bool disableStrongFriction = !!(contactBase0->materialFlags & PxMaterialFlag::eDISABLE_FRICTION);
staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W=V4SetX(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W, FLoad(staticFriction));
staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W=V4SetY(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W, FLoad(dynamicFriction));
SolverContactHeader* PX_RESTRICT header = reinterpret_cast<SolverContactHeader*>(ptr);
ptr += sizeof(SolverContactHeader);
Ps::prefetchLine(ptr + 128);
Ps::prefetchLine(ptr + 256);
Ps::prefetchLine(ptr + 384);
const bool haveFriction = (disableStrongFriction == 0) ;//PX_IR(n.staticFriction) > 0 || PX_IR(n.dynamicFriction) > 0;
header->numNormalConstr = Ps::to8(contactCount);
header->numFrictionConstr = Ps::to8(haveFriction ? frictionPatch.anchorCount*2 : 0);
header->type = Ps::to8(DY_SC_TYPE_EXT_CONTACT);
header->flags = flags;
const FloatV restitution = FLoad(combinedRestitution);
header->staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W = staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W;
header->angDom0 = invInertiaScale0;
header->angDom1 = invInertiaScale1;
const PxU32 pointStride = sizeof(SolverContactPointExt);
const PxU32 frictionStride = sizeof(SolverContactFrictionExt);
const Vec3V normal = V3LoadU(buffer[c.contactPatches[c.correlationListHeads[i]].start].normal);
FloatV accumImpulse = FZero();
for(PxU32 patch=c.correlationListHeads[i];
patch!=CorrelationBuffer::LIST_END;
patch = c.contactPatches[patch].next)
{
const PxU32 count = c.contactPatches[patch].count;
const Gu::ContactPoint* contactBase = buffer + c.contactPatches[patch].start;
PxU8* p = ptr;
for(PxU32 j=0;j<count;j++)
{
const Gu::ContactPoint& contact = contactBase[j];
SolverContactPointExt* PX_RESTRICT solverContact = reinterpret_cast<SolverContactPointExt*>(p);
p += pointStride;
accumImpulse = FAdd(accumImpulse, setupExtSolverContact(b0, b1, d0, d1, angD0, angD1, frame0p, frame1p, normal, invDt, invDtp8, restDistance, maxPenBias, restitution,
bounceThreshold, contact, *solverContact, ccdMaxSeparation, Z, vel0, vel1));
}
ptr = p;
}
contactWritebackCount += contactCount;
accumImpulse = FDiv(accumImpulse, FLoad(PxF32(contactCount)));
header->normal_minAppliedImpulseForFrictionW = V4SetW(Vec4V_From_Vec3V(normal), accumImpulse);
PxF32* forceBuffer = reinterpret_cast<PxF32*>(ptr);
PxMemZero(forceBuffer, sizeof(PxF32) * contactCount);
ptr += sizeof(PxF32) * ((contactCount + 3) & (~3));
header->broken = 0;
if(haveFriction)
{
//const Vec3V normal = Vec3V_From_PxVec3(buffer.contacts[c.contactPatches[c.correlationListHeads[i]].start].normal);
//Vec3V normalS = V3LoadA(buffer[c.contactPatches[c.correlationListHeads[i]].start].normal);
const Vec3V linVrel = V3Sub(vel0.linear, vel1.linear);
//const Vec3V normal = Vec3V_From_PxVec3_Aligned(buffer.contacts[c.contactPatches[c.correlationListHeads[i]].start].normal);
const FloatV orthoThreshold = FLoad(0.70710678f);
const FloatV p1 = FLoad(0.0001f);
// fallback: normal.cross((1,0,0)) or normal.cross((0,0,1))
const FloatV normalX = V3GetX(normal);
const FloatV normalY = V3GetY(normal);
const FloatV normalZ = V3GetZ(normal);
Vec3V t0Fallback1 = V3Merge(zero, FNeg(normalZ), normalY);
Vec3V t0Fallback2 = V3Merge(FNeg(normalY), normalX, zero);
Vec3V t0Fallback = V3Sel(FIsGrtr(orthoThreshold, FAbs(normalX)), t0Fallback1, t0Fallback2);
Vec3V t0 = V3Sub(linVrel, V3Scale(normal, V3Dot(normal, linVrel)));
t0 = V3Sel(FIsGrtr(V3LengthSq(t0), p1), t0, t0Fallback);
t0 = V3Normalize(t0);
const VecCrossV t0Cross = V3PrepareCross(t0);
const Vec3V t1 = V3Cross(normal, t0Cross);
const VecCrossV t1Cross = V3PrepareCross(t1);
//We want to set the writeBack ptr to point to the broken flag of the friction patch.
//On spu we have a slight problem here because the friction patch array is
//in local store rather than in main memory. The good news is that the address of the friction
//patch array in main memory is stored in the work unit. These two addresses will be equal
//except on spu where one is local store memory and the other is the effective address in main memory.
//Using the value stored in the work unit guarantees that the main memory address is used on all platforms.
PxU8* PX_RESTRICT writeback = frictionDataPtr + frictionPatchWritebackAddrIndex*sizeof(FrictionPatch);
header->frictionBrokenWritebackByte = writeback;
for(PxU32 j = 0; j < frictionPatch.anchorCount; j++)
{
SolverContactFrictionExt* PX_RESTRICT f0 = reinterpret_cast<SolverContactFrictionExt*>(ptr);
ptr += frictionStride;
SolverContactFrictionExt* PX_RESTRICT f1 = reinterpret_cast<SolverContactFrictionExt*>(ptr);
ptr += frictionStride;
Vec3V ra = V3LoadU(bodyFrame0.q.rotate(frictionPatch.body0Anchors[j]));
Vec3V rb = V3LoadU(bodyFrame1.q.rotate(frictionPatch.body1Anchors[j]));
Vec3V error = V3Sub(V3Add(ra, frame0p), V3Add(rb, frame1p));
{
const Vec3V raXn = V3Cross(ra, t0Cross);
const Vec3V rbXn = V3Cross(rb, t0Cross);
Cm::SpatialVectorV deltaV0, deltaV1;
const Cm::SpatialVectorV resp0 = createImpulseResponseVector(t0, raXn, b0);
const Cm::SpatialVectorV resp1 = createImpulseResponseVector(V3Neg(t0), V3Neg(rbXn), b1);
FloatV resp = getImpulseResponse(b0, resp0, deltaV0, d0, angD0,
b1, resp1, deltaV1, d1, angD1, reinterpret_cast<Cm::SpatialVectorV*>(Z));
const FloatV velMultiplier = FSel(FIsGrtr(resp, FLoad(DY_ARTICULATION_MIN_RESPONSE)), FDiv(p8, resp), zero);
const PxU32 index = c.contactPatches[c.correlationListHeads[i]].start;
FloatV targetVel = V3Dot(V3LoadA(buffer[index].targetVel), t0);
if(b0.mLinkIndex == PxSolverConstraintDesc::NO_LINK)
targetVel = FSub(targetVel, b0.projectVelocity(t0, raXn));
else if(b1.mLinkIndex == PxSolverConstraintDesc::NO_LINK)
targetVel = FAdd(targetVel, b1.projectVelocity(t0, rbXn));
f0->normalXYZ_appliedForceW = V4SetW(t0, zero);
f0->raXnXYZ_velMultiplierW = V4SetW(Vec4V_From_Vec3V(resp0.angular), velMultiplier);
f0->rbXnXYZ_biasW = V4SetW(V4Neg(Vec4V_From_Vec3V(resp1.angular)), FMul(V3Dot(t0, error), invDt));
f0->linDeltaVA = deltaV0.linear;
f0->angDeltaVA = deltaV0.angular;
f0->linDeltaVB = deltaV1.linear;
f0->angDeltaVB = deltaV1.angular;
FStore(targetVel, &f0->targetVel);
}
{
const Vec3V raXn = V3Cross(ra, t1Cross);
const Vec3V rbXn = V3Cross(rb, t1Cross);
Cm::SpatialVectorV deltaV0, deltaV1;
const Cm::SpatialVectorV resp0 = createImpulseResponseVector(t1, raXn, b0);
const Cm::SpatialVectorV resp1 = createImpulseResponseVector(V3Neg(t1), V3Neg(rbXn), b1);
FloatV resp = getImpulseResponse(b0, resp0, deltaV0, d0, angD0,
b1, resp1, deltaV1, d1, angD1, reinterpret_cast<Cm::SpatialVectorV*>(Z));
//const FloatV velMultiplier = FSel(FIsGrtr(resp, FLoad(DY_ARTICULATION_MIN_RESPONSE)), FMul(p8, FRecip(resp)), zero);
const FloatV velMultiplier = FSel(FIsGrtr(resp, FLoad(DY_ARTICULATION_MIN_RESPONSE)), FDiv(p8, resp), zero);
const PxU32 index = c.contactPatches[c.correlationListHeads[i]].start;
FloatV targetVel = V3Dot(V3LoadA(buffer[index].targetVel), t1);
if(b0.mLinkIndex == PxSolverConstraintDesc::NO_LINK)
targetVel = FSub(targetVel, b0.projectVelocity(t1, raXn));
else if(b1.mLinkIndex == PxSolverConstraintDesc::NO_LINK)
targetVel = FAdd(targetVel, b1.projectVelocity(t1, rbXn));
f1->normalXYZ_appliedForceW = V4SetW(t1, zero);
f1->raXnXYZ_velMultiplierW = V4SetW(Vec4V_From_Vec3V(resp0.angular), velMultiplier);
f1->rbXnXYZ_biasW = V4SetW(V4Neg(Vec4V_From_Vec3V(resp1.angular)), FMul(V3Dot(t1, error), invDt));
f1->linDeltaVA = deltaV0.linear;
f1->angDeltaVA = deltaV0.angular;
f1->linDeltaVB = deltaV1.linear;
f1->angDeltaVB = deltaV1.angular;
FStore(targetVel, &f1->targetVel);
}
}
}
frictionPatchWritebackAddrIndex++;
}
}
}
}

View File

@ -0,0 +1,103 @@
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2021 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#ifndef DY_SOLVERCONSTRAINTEXT_H
#define DY_SOLVERCONSTRAINTEXT_H
#include "DySolverExt.h"
#include "PsVecMath.h"
namespace physx
{
struct PxcNpWorkUnit;
namespace Gu
{
class ContactBuffer;
struct ContactPoint;
}
namespace Dy
{
struct CorrelationBuffer;
PxReal getImpulseResponse(const SolverExtBody& b0, const Cm::SpatialVector& impulse0, Cm::SpatialVector& deltaV0, PxReal dom0, PxReal angDom0,
const SolverExtBody& b1, const Cm::SpatialVector& impulse1, Cm::SpatialVector& deltaV1, PxReal dom1, PxReal angDom1,
Cm::SpatialVectorF* Z, bool allowSelfCollision = false);
Ps::aos::FloatV getImpulseResponse(const SolverExtBody& b0, const Cm::SpatialVectorV& impulse0, Cm::SpatialVectorV& deltaV0, const Ps::aos::FloatV& dom0, const Ps::aos::FloatV& angDom0,
const SolverExtBody& b1, const Cm::SpatialVectorV& impulse1, Cm::SpatialVectorV& deltaV1, const Ps::aos::FloatV& dom1, const Ps::aos::FloatV& angDom1,
Cm::SpatialVectorV* Z, bool allowSelfCollision = false);
Cm::SpatialVector createImpulseResponseVector(const PxVec3& linear, const PxVec3& angular, const SolverExtBody& body);
Cm::SpatialVectorV createImpulseResponseVector(const Ps::aos::Vec3V& linear, const Ps::aos::Vec3V& angular, const SolverExtBody& body);
void setupFinalizeExtSolverContacts(
const Gu::ContactPoint* buffer,
const CorrelationBuffer& c,
const PxTransform& bodyFrame0,
const PxTransform& bodyFrame1,
PxU8* workspace,
const SolverExtBody& b0,
const SolverExtBody& b1,
const PxReal invDtF32,
PxReal bounceThresholdF32,
PxReal invMassScale0, PxReal invInertiaScale0,
PxReal invMassScale1, PxReal invInertiaScale1,
PxReal restDistance, PxU8* frictionDataPtr,
PxReal ccdMaxContactDist,
Cm::SpatialVectorF* Z);
bool setupFinalizeExtSolverContactsCoulomb(
const Gu::ContactBuffer& buffer,
const CorrelationBuffer& c,
const PxTransform& bodyFrame0,
const PxTransform& bodyFrame1,
PxU8* workspace,
PxReal invDt,
PxReal bounceThreshold,
const SolverExtBody& b0,
const SolverExtBody& b1,
PxU32 frictionCountPerPoint,
PxReal invMassScale0, PxReal invInertiaScale0,
PxReal invMassScale1, PxReal invInertiaScale1,
PxReal restDist,
PxReal ccdMaxContactDist,
Cm::SpatialVectorF* Z);
} //namespace Dy
}
#endif //DY_SOLVERCONSTRAINTEXT_H

View File

@ -0,0 +1,310 @@
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2021 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#include "foundation/PxPreprocessor.h"
#include "PsVecMath.h"
#include "DyArticulationContactPrep.h"
#include "DySolverConstraintDesc.h"
#include "DySolverConstraint1D.h"
#include "DySolverContact.h"
#include "DySolverContactPF.h"
#include "DyArticulationHelper.h"
#include "PxcNpWorkUnit.h"
#include "PxsMaterialManager.h"
#include "PxsMaterialCombiner.h"
#include "DyCorrelationBuffer.h"
#include "DySolverConstraintExtShared.h"
using namespace physx;
using namespace Gu;
// constraint-gen only, since these use getVelocityFast methods
// which aren't valid during the solver phase
namespace physx
{
namespace Dy
{
bool setupFinalizeExtSolverContactsCoulomb(
const ContactBuffer& buffer,
const CorrelationBuffer& c,
const PxTransform& bodyFrame0,
const PxTransform& bodyFrame1,
PxU8* workspace,
PxReal invDt,
PxReal bounceThresholdF32,
const SolverExtBody& b0,
const SolverExtBody& b1,
PxU32 frictionCountPerPoint,
PxReal invMassScale0, PxReal invInertiaScale0,
PxReal invMassScale1, PxReal invInertiaScale1,
PxReal restDist,
PxReal ccdMaxDistance,
Cm::SpatialVectorF* Z)
{
// NOTE II: the friction patches are sparse (some of them have no contact patches, and
// therefore did not get written back to the cache) but the patch addresses are dense,
// corresponding to valid patches
const FloatV ccdMaxSeparation = FLoad(ccdMaxDistance);
PxU8* PX_RESTRICT ptr = workspace;
const PxF32 maxPenBias0 = b0.mLinkIndex == PxSolverConstraintDesc::NO_LINK ? b0.mBodyData->penBiasClamp : b0.mArticulation->getLinkMaxPenBias(b0.mLinkIndex);
const PxF32 maxPenBias1 = b1.mLinkIndex == PxSolverConstraintDesc::NO_LINK ? b1.mBodyData->penBiasClamp : b1.mArticulation->getLinkMaxPenBias(b1.mLinkIndex);
const FloatV maxPenBias = FLoad(PxMax(maxPenBias0, maxPenBias1)/invDt);
const FloatV restDistance = FLoad(restDist);
const FloatV bounceThreshold = FLoad(bounceThresholdF32);
const Cm::SpatialVectorV vel0 = b0.getVelocity();
const Cm::SpatialVectorV vel1 = b1.getVelocity();
const FloatV invDtV = FLoad(invDt);
const FloatV pt8 = FLoad(0.8f);
const FloatV invDtp8 = FMul(invDtV, pt8);
const Vec3V bodyFrame0p = V3LoadU(bodyFrame0.p);
const Vec3V bodyFrame1p = V3LoadU(bodyFrame1.p);
Ps::prefetchLine(c.contactID);
Ps::prefetchLine(c.contactID, 128);
const PxU32 frictionPatchCount = c.frictionPatchCount;
const PxU32 pointStride = sizeof(SolverContactPointExt);
const PxU32 frictionStride = sizeof(SolverContactFrictionExt);
const PxU8 pointHeaderType = DY_SC_TYPE_EXT_CONTACT;
const PxU8 frictionHeaderType = DY_SC_TYPE_EXT_FRICTION;
const FloatV d0 = FLoad(invMassScale0);
const FloatV d1 = FLoad(invMassScale1);
const FloatV angD0 = FLoad(invInertiaScale0);
const FloatV angD1 = FLoad(invInertiaScale1);
PxU8 flags = 0;
for(PxU32 i=0;i< frictionPatchCount;i++)
{
const PxU32 contactCount = c.frictionPatchContactCounts[i];
if(contactCount == 0)
continue;
const Gu::ContactPoint* contactBase0 = buffer.contacts + c.contactPatches[c.correlationListHeads[i]].start;
const Vec3V normalV = Ps::aos::V3LoadA(contactBase0->normal);
const Vec3V normal = V3LoadA(contactBase0->normal);
const PxReal combinedRestitution = contactBase0->restitution;
SolverContactCoulombHeader* PX_RESTRICT header = reinterpret_cast<SolverContactCoulombHeader*>(ptr);
ptr += sizeof(SolverContactCoulombHeader);
Ps::prefetchLine(ptr, 128);
Ps::prefetchLine(ptr, 256);
Ps::prefetchLine(ptr, 384);
const FloatV restitution = FLoad(combinedRestitution);
header->numNormalConstr = PxU8(contactCount);
header->type = pointHeaderType;
//header->setRestitution(combinedRestitution);
header->setDominance0(d0);
header->setDominance1(d1);
header->angDom0 = invInertiaScale0;
header->angDom1 = invInertiaScale1;
header->flags = flags;
header->setNormal(normalV);
for(PxU32 patch=c.correlationListHeads[i];
patch!=CorrelationBuffer::LIST_END;
patch = c.contactPatches[patch].next)
{
const PxU32 count = c.contactPatches[patch].count;
const Gu::ContactPoint* contactBase = buffer.contacts + c.contactPatches[patch].start;
PxU8* p = ptr;
for(PxU32 j=0;j<count;j++)
{
const Gu::ContactPoint& contact = contactBase[j];
SolverContactPointExt* PX_RESTRICT solverContact = reinterpret_cast<SolverContactPointExt*>(p);
p += pointStride;
setupExtSolverContact(b0, b1, d0, d1, angD0, angD1, bodyFrame0p, bodyFrame1p, normal, invDtV, invDtp8, restDistance, maxPenBias, restitution,
bounceThreshold, contact, *solverContact, ccdMaxSeparation, Z, vel0, vel1);
}
ptr = p;
}
}
//construct all the frictions
PxU8* PX_RESTRICT ptr2 = workspace;
const PxF32 orthoThreshold = 0.70710678f;
const PxF32 eps = 0.00001f;
bool hasFriction = false;
for(PxU32 i=0;i< frictionPatchCount;i++)
{
const PxU32 contactCount = c.frictionPatchContactCounts[i];
if(contactCount == 0)
continue;
SolverContactCoulombHeader* header = reinterpret_cast<SolverContactCoulombHeader*>(ptr2);
header->frictionOffset = PxU16(ptr - ptr2);
ptr2 += sizeof(SolverContactCoulombHeader) + header->numNormalConstr * pointStride;
const Gu::ContactPoint* contactBase0 = buffer.contacts + c.contactPatches[c.correlationListHeads[i]].start;
PxVec3 normal = contactBase0->normal;
const PxReal staticFriction = contactBase0->staticFriction;
const bool disableStrongFriction = !!(contactBase0->materialFlags & PxMaterialFlag::eDISABLE_FRICTION);
const bool haveFriction = (disableStrongFriction == 0);
SolverFrictionHeader* frictionHeader = reinterpret_cast<SolverFrictionHeader*>(ptr);
frictionHeader->numNormalConstr = Ps::to8(c.frictionPatchContactCounts[i]);
frictionHeader->numFrictionConstr = Ps::to8(haveFriction ? c.frictionPatchContactCounts[i] * frictionCountPerPoint : 0);
frictionHeader->flags = flags;
ptr += sizeof(SolverFrictionHeader);
PxF32* forceBuffer = reinterpret_cast<PxF32*>(ptr);
ptr += frictionHeader->getAppliedForcePaddingSize(c.frictionPatchContactCounts[i]);
PxMemZero(forceBuffer, sizeof(PxF32) * c.frictionPatchContactCounts[i]);
Ps::prefetchLine(ptr, 128);
Ps::prefetchLine(ptr, 256);
Ps::prefetchLine(ptr, 384);
const PxVec3 t0Fallback1(0.f, -normal.z, normal.y);
const PxVec3 t0Fallback2(-normal.y, normal.x, 0.f) ;
const PxVec3 tFallback1 = orthoThreshold > PxAbs(normal.x) ? t0Fallback1 : t0Fallback2;
const PxVec3 vrel = b0.getLinVel() - b1.getLinVel();
const PxVec3 t0_ = vrel - normal * (normal.dot(vrel));
const PxReal sqDist = t0_.dot(t0_);
const PxVec3 tDir0 = (sqDist > eps ? t0_: tFallback1).getNormalized();
const PxVec3 tDir1 = tDir0.cross(normal);
PxVec3 tFallback[2] = {tDir0, tDir1};
PxU32 ind = 0;
if(haveFriction)
{
hasFriction = true;
frictionHeader->setStaticFriction(staticFriction);
frictionHeader->invMass0D0 = invMassScale0;
frictionHeader->invMass1D1 = invMassScale1;
frictionHeader->angDom0 = invInertiaScale0;
frictionHeader->angDom1 = invInertiaScale1;
frictionHeader->type = frictionHeaderType;
PxU32 totalPatchContactCount = 0;
for(PxU32 patch=c.correlationListHeads[i];
patch!=CorrelationBuffer::LIST_END;
patch = c.contactPatches[patch].next)
{
const PxU32 count = c.contactPatches[patch].count;
const PxU32 start = c.contactPatches[patch].start;
const Gu::ContactPoint* contactBase = buffer.contacts + start;
PxU8* p = ptr;
for(PxU32 j =0; j < count; j++)
{
const Gu::ContactPoint& contact = contactBase[j];
const Vec3V ra = V3Sub(V3LoadA(contact.point), bodyFrame0p);
const Vec3V rb = V3Sub(V3LoadA(contact.point), bodyFrame1p);
const Vec3V targetVel = V3LoadA(contact.targetVel);
const Vec3V pVRa = V3Add(vel0.linear, V3Cross(vel0.angular, ra));
const Vec3V pVRb = V3Add(vel1.linear, V3Cross(vel1.angular, rb));
//const PxVec3 vrel = pVRa - pVRb;
for(PxU32 k = 0; k < frictionCountPerPoint; ++k)
{
SolverContactFrictionExt* PX_RESTRICT f0 = reinterpret_cast<SolverContactFrictionExt*>(p);
p += frictionStride;
Vec3V t0 = V3LoadU(tFallback[ind]);
ind = 1 - ind;
const Vec3V raXn = V3Cross(ra, t0);
const Vec3V rbXn = V3Cross(rb, t0);
Cm::SpatialVectorV deltaV0, deltaV1;
const Cm::SpatialVectorV resp0 = createImpulseResponseVector(t0, raXn, b0);
const Cm::SpatialVectorV resp1 = createImpulseResponseVector(V3Neg(t0), V3Neg(rbXn), b1);
FloatV unitResponse = getImpulseResponse(b0, resp0, deltaV0, d0, angD0,
b1, resp1, deltaV1, d1, angD1, reinterpret_cast<Cm::SpatialVectorV*>(Z));
FloatV tv = V3Dot(targetVel, t0);
if(b0.mLinkIndex == PxSolverConstraintDesc::NO_LINK)
tv = FAdd(tv, V3Dot(pVRa, t0));
else if(b1.mLinkIndex == PxSolverConstraintDesc::NO_LINK)
tv = FSub(tv, V3Dot(pVRb, t0));
const FloatV recipResponse = FSel(FIsGrtr(unitResponse, FZero()), FRecip(unitResponse), FZero());
f0->raXnXYZ_velMultiplierW = V4SetW(Vec4V_From_Vec3V(resp0.angular), recipResponse);
f0->rbXnXYZ_biasW = V4SetW(V4Neg(Vec4V_From_Vec3V(resp1.angular)), FZero());
f0->normalXYZ_appliedForceW = V4SetW(Vec4V_From_Vec3V(t0), FZero());
FStore(tv, &f0->targetVel);
f0->linDeltaVA = deltaV0.linear;
f0->angDeltaVA = deltaV0.angular;
f0->linDeltaVB = deltaV1.linear;
f0->angDeltaVB = deltaV1.angular;
}
}
totalPatchContactCount += c.contactPatches[patch].count;
ptr = p;
}
}
}
//PX_ASSERT(ptr - workspace == n.solverConstraintSize);
return hasFriction;
}
}
}

View File

@ -0,0 +1,262 @@
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2021 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#ifndef DY_ARTICULATION_DEBUG_FNS_H
#define DY_ARTICULATION_DEBUG_FNS_H
#include "DyArticulationFnsScalar.h"
#include "DyArticulationFnsSimd.h"
namespace physx
{
namespace Dy
{
#if 0
void printMomentum(const char* id, PxTransform* pose, Cm::SpatialVector* velocity, FsInertia* inertia, PxU32 linkCount)
{
typedef ArticulationFnsScalar Fns;
Cm::SpatialVector m = Cm::SpatialVector::zero();
for(PxU32 i=0;i<linkCount;i++)
m += Fns::translateForce(pose[i].p - pose[0].p, Fns::multiply(inertia[i], velocity[i]));
printf("momentum (%20s): (%f, %f, %f), (%f, %f, %f)\n", id, m.linear.x, m.linear.y, m.linear.z, m.angular.x, m.angular.y, m.angular.z);
}
#endif
class ArticulationFnsDebug
{
typedef ArticulationFnsSimdBase SimdBase;
typedef ArticulationFnsSimd<ArticulationFnsDebug> Simd;
typedef ArticulationFnsScalar Scalar;
public:
static PX_FORCE_INLINE FsInertia addInertia(const FsInertia& in1, const FsInertia& in2)
{
return FsInertia(M33Add(in1.ll, in2.ll),
M33Add(in1.la, in2.la),
M33Add(in1.aa, in2.aa));
}
static PX_FORCE_INLINE FsInertia subtractInertia(const FsInertia& in1, const FsInertia& in2)
{
return FsInertia(M33Sub(in1.ll, in2.ll),
M33Sub(in1.la, in2.la),
M33Sub(in1.aa, in2.aa));
}
static Mat33V invertSym33(const Mat33V &m)
{
PxMat33 n_ = Scalar::invertSym33(unsimdify(m));
Mat33V n = SimdBase::invertSym33(m);
compare33(n_, unsimdify(n));
return n;
}
static Mat33V invSqrt(const Mat33V &m)
{
PxMat33 n_ = Scalar::invSqrt(unsimdify(m));
Mat33V n = SimdBase::invSqrt(m);
compare33(n_, unsimdify(n));
return n;
}
static FsInertia invertInertia(const FsInertia &I)
{
SpInertia J_ = Scalar::invertInertia(unsimdify(I));
FsInertia J = SimdBase::invertInertia(I);
compareInertias(J_,unsimdify(J));
return J;
}
static Mat33V computeSIS(const FsInertia &I, const Cm::SpatialVectorV S[3], Cm::SpatialVectorV*PX_RESTRICT IS)
{
Cm::SpatialVector IS_[3];
Scalar::multiply(IS_, unsimdify(I), unsimdify(&S[0]));
PxMat33 D_ = Scalar::multiplySym(IS_, unsimdify(&S[0]));
Mat33V D = SimdBase::computeSIS(I, S, IS);
compare33(unsimdify(D), D_);
return D;
}
static FsInertia multiplySubtract(const FsInertia &I, const Mat33V &D, const Cm::SpatialVectorV IS[3], Cm::SpatialVectorV*PX_RESTRICT DSI)
{
Cm::SpatialVector DSI_[3];
Scalar::multiply(DSI_, unsimdify(IS), unsimdify(D));
SpInertia J_ = Scalar::multiplySubtract(unsimdify(I), DSI_, unsimdify(IS));
FsInertia J = SimdBase::multiplySubtract(I, D, IS, DSI);
compareInertias(unsimdify(J), J_);
return J;
}
static FsInertia multiplySubtract(const FsInertia &I, const Cm::SpatialVectorV S[3])
{
SpInertia J_ = Scalar::multiplySubtract(unsimdify(I), unsimdify(S), unsimdify(S));
FsInertia J = SimdBase::multiplySubtract(I, S);
compareInertias(unsimdify(J), J_);
return J;
}
static FsInertia translateInertia(Vec3V offset, const FsInertia &I)
{
PxVec3 offset_;
V3StoreU(offset, offset_);
SpInertia J_ = Scalar::translate(offset_, unsimdify(I));
FsInertia J = SimdBase::translateInertia(offset, I);
compareInertias(J_, unsimdify(J));
return J;
}
static PX_FORCE_INLINE FsInertia propagate(const FsInertia &I,
const Cm::SpatialVectorV S[3],
const Mat33V &load,
const FloatV isf)
{
SpInertia J_ = Scalar::propagate(unsimdify(I), unsimdify(&S[0]), unsimdify(load), unsimdify(isf));
FsInertia J = Simd::propagate(I, S, load, isf);
compareInertias(J_, unsimdify(J));
return J;
}
static PX_FORCE_INLINE Mat33V computeDriveInertia(const FsInertia &I0,
const FsInertia &I1,
const Cm::SpatialVectorV S[3])
{
PxMat33 m_ = Scalar::computeDriveInertia(unsimdify(I0), unsimdify(I1), unsimdify(&S[0]));
Mat33V m = Simd::computeDriveInertia(I0, I1, S);
compare33(m_, unsimdify(m));
return m;
}
static const PxMat33 unsimdify(const Mat33V &m)
{
PX_ALIGN(16, PxMat33) m_;
PxMat33_From_Mat33V(m, m_);
return m_;
}
static PxReal unsimdify(const FloatV &m)
{
PxF32 f;
FStore(m, &f);
return f;
}
static SpInertia unsimdify(const FsInertia &I)
{
return SpInertia (unsimdify(I.ll),
unsimdify(I.la),
unsimdify(I.aa));
}
static const Cm::SpatialVector* unsimdify(const Cm::SpatialVectorV *S)
{
return reinterpret_cast<const Cm::SpatialVector*>(S);
}
private:
static PxReal absmax(const PxVec3& n)
{
return PxMax(PxAbs(n.x), PxMax(PxAbs(n.y),PxAbs(n.z)));
}
static PxReal norm(const PxMat33& n)
{
return PxMax(absmax(n.column0), PxMax(absmax(n.column1), absmax(n.column2)));
}
static void compare33(const PxMat33& ref, const PxMat33& n)
{
PxReal errNorm = norm(ref-n);
PX_UNUSED(errNorm);
PX_ASSERT(errNorm <= PxMax(norm(ref)*1e-3f, 1e-4f));
}
static void compareInertias(const SpInertia& a, const SpInertia& b)
{
compare33(a.mLL, b.mLL);
compare33(a.mLA, b.mLA);
compare33(a.mAA, b.mAA);
}
};
#if DY_ARTICULATION_DEBUG_VERIFY
static bool isPositiveDefinite(const Mat33V& m)
{
PX_ALIGN_PREFIX(16) PxMat33 m1 PX_ALIGN_SUFFIX(16);
PxMat33_From_Mat33V(m, m1);
return isPositiveDefinite(m1);
}
static bool isPositiveDefinite(const FsInertia& s)
{
return isPositiveDefinite(ArticulationFnsDebug::unsimdify(s));
}
static PxReal magnitude(const Cm::SpatialVectorV &v)
{
return PxSqrt(FStore(V3Dot(v.linear, v.linear)) + FStore(V3Dot(v.angular, v.angular)));
}
static bool almostEqual(const Cm::SpatialVectorV &ref, const Cm::SpatialVectorV& test, PxReal tolerance)
{
return magnitude(ref-test)<=tolerance*magnitude(ref);
}
#endif
}
}
#endif //DY_ARTICULATION_DEBUG_FNS_H

View File

@ -0,0 +1,397 @@
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2021 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#ifndef DY_ARTICULATION_SCALAR_FNS_H
#define DY_ARTICULATION_SCALAR_FNS_H
// Scalar helpers for articulations
#include "DyArticulationUtils.h"
#include "DyArticulationScalar.h"
#include "DySpatial.h"
namespace physx
{
namespace Dy
{
/*
namespace
{
static void print(const PxMat33 &m)
{
printf("(%f, %f, %f)\n(%f, %f, %f)\n(%f, %f, %f)\n\n",
m[0][0], m[0][1], m[0][2], m[1][0], m[1][1], m[1][2], m[2][0], m[2][1], m[2][2]);
}
static void print(const Cm::SpatialVector *v, PxU32 count)
{
for(PxU32 i=0;i<count;i++)
{
printf("(%f, %f, %f), (%f, %f, %f)\n",
v[i].linear.x, v[i].linear.y, v[i].linear.z,
v[i].angular.x, v[i].angular.y, v[i].angular.z);
}
}
}
*/
class ArticulationDiagnostics
{
public:
static bool cholesky(const PxMat33& in, PxMat33& out)
{
out = in;
if(out[0][0]<=0)
return false;
out[0] /= PxSqrt(out[0][0]);
out[1] -= out[0][1]*out[0];
out[2] -= out[0][2]*out[0];
if(out[1][1]<=0)
return false;
out[1] /= PxSqrt(out[1][1]);
out[2] -= out[1][2]*out[1];
if(out[2][2]<=0)
return false;
out[2] /= PxSqrt(out[2][2]);
out[1][0] = out[2][0] = out[2][1] = 0;
return true;
}
static bool isSymmetric(const PxMat33&a)
{
return a[0][1] == a[1][0] && a[0][2] == a[2][0] && a[1][2] == a[2][1];
}
static bool isSymmetric(const Mat33V&a)
{
PxMat33 m;
PxMat33_From_Mat33V(a,m);
return isSymmetric(m);
}
static bool isSymmetric(const SpInertia&a)
{
return isSymmetric(a.mLL) && isSymmetric(a.mAA);
}
static bool isPositiveDefinite(const PxMat33& m)
{
PxMat33 _;
return cholesky(m, _);
}
static bool isPositiveDefinite(const SpInertia &s)
{
// compute
// (a 0)
// (b c)
PxMat33 a;
if(!cholesky(s.mLL, a))
return false;
PxMat33 bt = a.getInverse() * s.mLA;
PxMat33 x = s.mAA - bt.getTranspose()*bt;
PxMat33 c;
return cholesky(x, c);
}
};
class ArticulationFnsScalar
{
public:
static PX_FORCE_INLINE Cm::SpatialVector translateMotion(const PxVec3& p, const Cm::SpatialVector& v)
{
return Cm::SpatialVector(v.linear + p.cross(v.angular), v.angular);
}
// translate a force resolved at position p to the origin
static PX_FORCE_INLINE Cm::SpatialVector translateForce(const PxVec3& p, const Cm::SpatialVector& v)
{
return Cm::SpatialVector(v.linear, v.angular + p.cross(v.linear));
}
static PX_FORCE_INLINE PxMat33 invertSym33(const PxMat33& in)
{
PxVec3 v0 = in[1].cross(in[2]),
v1 = in[2].cross(in[0]),
v2 = in[0].cross(in[1]);
PxReal det = v0.dot(in[0]);
PX_ASSERT(det!=0);
PxReal recipDet = 1.0f/det;
return PxMat33(v0 * recipDet,
PxVec3(v0.y, v1.y, v1.z) * recipDet,
PxVec3(v0.z, v1.z, v2.z) * recipDet);
}
static PX_FORCE_INLINE SpInertia multiplySubtract(const SpInertia& I, const Cm::SpatialVector in0[3], const Cm::SpatialVector in1[3])
{
return I - SpInertia::dyad(in0[0], in1[0])
- SpInertia::dyad(in0[1], in1[1])
- SpInertia::dyad(in0[2], in1[2]);
}
static PX_FORCE_INLINE PxMat33 multiplySym(const Cm::SpatialVector* IS, const Cm::SpatialVector* S)
{
// return PxMat33(axisDot(IS, S[0]), axisDot(IS, S[1]), axisDot(IS, S[2]));
PxReal a00 = IS[0].dot(S[0]), a01 = IS[0].dot(S[1]), a02 = IS[0].dot(S[2]),
a11 = IS[1].dot(S[1]), a12 = IS[1].dot(S[2]),
a22 = IS[2].dot(S[2]);
return PxMat33(PxVec3(a00, a01, a02),
PxVec3(a01, a11, a12),
PxVec3(a02, a12, a22));
}
static PX_FORCE_INLINE void multiply(Cm::SpatialVector out[3], const SpInertia& I, const Cm::SpatialVector in[3])
{
out[0] = I * in[0];
out[1] = I * in[1];
out[2] = I * in[2];
}
static PX_FORCE_INLINE void multiply(Cm::SpatialVector out[3], const Cm::SpatialVector in[3], const PxMat33& D)
{
out[0] = axisMultiply(in, D[0]);
out[1] = axisMultiply(in, D[1]);
out[2] = axisMultiply(in, D[2]);
}
static PxMat33 invSqrt(const PxMat33 &m)
{
// cholesky factor to
// (a 0 0)
// (b c 0)
// (d e f)
// except that a,c,f are the reciprocal sqrts rather than sqrts
PxVec3 v0 = m.column0, v1 = m.column1, v2 = m.column2;
PxReal a = PxRecipSqrt(v0.x);
PxReal b = v0.y*a;
PxReal c = PxRecipSqrt(v1.y - b*b);
PxReal d = v0.z*a;
PxReal e = (v1.z-d*b) * c;
PxReal f = PxRecipSqrt(v2.z - d*d - e*e);
// invert
PxReal x = -b*a*c, y = (-e*x-d*a)*f, z = -e*c*f;
PxMat33 r(PxVec3(a, 0, 0 ),
PxVec3(x, c, 0 ),
PxVec3(y, z, f));
return r;
}
static PX_FORCE_INLINE PxMat33 computeSIS(const Cm::SpatialVector S[3], const SpInertia& I)
{
Cm::SpatialVector IS[3];
multiply(IS, I, S);
return multiplySym(IS, S);
}
// translate from COM-centered world-aligned inertia matrix to a displaced frame
static PX_INLINE SpInertia translate(const PxVec3& p, const SpInertia& i)
{
PxMat33 S = Ps::star(p), ST = S.getTranspose();
PxMat33 sla = S * i.mLA, llst = i.mLL * ST;
// return SpInertia(i.mLL, i.mLA + llst, i.mAA + sla + sla.getTranspose() + S * llst);
// this yields a symmetric result
PxMat33 t = sla+S*llst*0.5f;
return SpInertia(i.mLL, i.mLA + llst, i.mAA + (t+t.getTranspose())); }
static PX_FORCE_INLINE Cm::SpatialVector axisMultiply(const Cm::SpatialVector* a, const PxVec3& v)
{
return a[0]*v[0]+a[1]*v[1]+a[2]*v[2];
}
static PX_FORCE_INLINE PxVec3 axisDot(const Cm::SpatialVector* a, const Cm::SpatialVector& v)
{
return PxVec3(a[0].dot(v), a[1].dot(v), a[2].dot(v));
}
static PX_FORCE_INLINE SpInertia invertInertia(const SpInertia& I)
{
PxMat33 aa = I.mAA, ll = I.mLL, la = I.mLA;
aa = (aa + aa.getTranspose())*0.5f;
ll = (ll + ll.getTranspose())*0.5f;
PxMat33 AAInv = invertSym33(aa);
PxMat33 z = -la * AAInv;
PxMat33 S = ll + z * la.getTranspose(); // Schur complement of mAA
PxMat33 LL = invertSym33(S);
PxMat33 LA = LL * z;
PxMat33 AA = AAInv + z.getTranspose() * LA;
SpInertia result(LL, LA, AA);
return result;
}
static SpInertia propagate(const SpInertia& I,
const Cm::SpatialVector S[3],
const PxMat33& load,
PxReal isf)
{
Cm::SpatialVector IS[3], ISD[3];
multiply(IS, I, S);
PxMat33 SIS = multiplySym(S, IS);
// yields a symmetric result
PxMat33 D = invSqrt(SIS+load*isf);
multiply(ISD, IS, D);
return multiplySubtract(I, ISD, ISD);
}
static PxMat33 computeDriveInertia(const SpInertia& I0,
const SpInertia& I1,
const Cm::SpatialVector S[3])
{
// this could be a lot more efficient, especially since it can be combined with
// the inertia accumulation. Also it turns out to be symmetric in I0 and I1, which
// isn't obvious from the formulation, so it's likely there's a more efficient formulation
PxMat33 D = invertSym33(computeSIS(S,I0));
Cm::SpatialVector IS[3], ISD[3];
multiply(IS,I0,S);
multiply(ISD, IS, D);
SpInertia tot = multiplySubtract(I0+I1,ISD,IS);
SpInertia invTot = invertInertia(tot);
PxMat33 E = computeSIS(ISD,invTot);
PxMat33 load = invertSym33(E+D);
PX_ASSERT(load[0].isFinite() && load[1].isFinite() && load[2].isFinite());
PX_ASSERT(ArticulationDiagnostics::isSymmetric(load) && ArticulationDiagnostics::isPositiveDefinite(load));
return load;
}
static PX_INLINE Cm::SpatialVector propagateImpulse(const FsRow& row,
const FsJointVectors& jv,
PxVec3& SZ,
const Cm::SpatialVector& Z,
const FsRowAux& aux)
{
PX_UNUSED(aux);
SZ = Z.angular + Z.linear.cross(getJointOffset(jv));
Cm::SpatialVector result = translateForce(getParentOffset(jv), Z - axisMultiply(getDSI(row), SZ));
#if DY_ARTICULATION_DEBUG_VERIFY
PxVec3 SZcheck;
Cm::SpatialVector check = ArticulationRef::propagateImpulse(row, jv, SZcheck, Z, aux);
PX_ASSERT((result-check).magnitude()<1e-5*PxMax(check.magnitude(), 1.0f));
PX_ASSERT((SZ-SZcheck).magnitude()<1e-5*PxMax(SZcheck.magnitude(), 1.0f));
#endif
return result;
}
static PX_INLINE Cm::SpatialVector propagateVelocity(const FsRow& row,
const FsJointVectors& jv,
const PxVec3& SZ,
const Cm::SpatialVector& v,
const FsRowAux& aux)
{
PX_UNUSED(aux);
Cm::SpatialVector w = translateMotion(-getParentOffset(jv), v);
PxVec3 DSZ = multiply(row.D, SZ);
PxVec3 n = axisDot(getDSI(row), w) + DSZ;
Cm::SpatialVector result = w - Cm::SpatialVector(getJointOffset(jv).cross(n),n);
#if DY_ARTICULATION_DEBUG_VERIFY
Cm::SpatialVector check = ArticulationRef::propagateVelocity(row, jv, SZ, v, aux);
PX_ASSERT((result-check).magnitude()<1e-5*PxMax(check.magnitude(), 1.0f));
#endif
return result;
}
static PX_FORCE_INLINE PxVec3 multiply(const Mat33V& m, const PxVec3& v)
{
return reinterpret_cast<const PxVec3&>(m.col0) * v.x
+ reinterpret_cast<const PxVec3&>(m.col1) * v.y
+ reinterpret_cast<const PxVec3&>(m.col2) * v.z;
}
static PX_FORCE_INLINE PxVec3 multiplyTranspose(const Mat33V& m, const PxVec3& v)
{
return PxVec3(v.dot(reinterpret_cast<const PxVec3&>(m.col0)),
v.dot(reinterpret_cast<const PxVec3&>(m.col1)),
v.dot(reinterpret_cast<const PxVec3&>(m.col2)));
}
static Cm::SpatialVector multiply(const FsInertia& m, const Cm::SpatialVector& v)
{
return Cm::SpatialVector(multiply(m.ll,v.linear) + multiply(m.la,v.angular),
multiplyTranspose(m.la, v.linear) + multiply(m.aa, v.angular));
}
static PX_FORCE_INLINE Cm::SpatialVector getRootDeltaV(const FsData& matrix, const Cm::SpatialVector& Z)
{
return multiply(getRootInverseInertia(matrix), Z);
}
};
}
}
#endif //DY_ARTICULATION_SCALAR_FNS_H

View File

@ -0,0 +1,442 @@
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2021 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#ifndef DY_ARTICULATION_SIMD_FNS_H
#define DY_ARTICULATION_SIMD_FNS_H
#include "DyArticulationUtils.h"
namespace physx
{
namespace Dy
{
template <typename T, PxU32 count>
class PodULike
{
PxU8 space[sizeof(T)*count];
public:
PX_FORCE_INLINE operator T*() { return reinterpret_cast<T*>(space); }
};
#define POD_U_LIKE(_T, _count, _alignment) PX_ALIGN_PREFIX(_alignment) PodULike<_T, _count> PX_ALIGN_SUFFIX(_alignment)
class ArticulationFnsSimdBase
{
public:
static PX_FORCE_INLINE FsInertia addInertia(const FsInertia& in1, const FsInertia& in2)
{
return FsInertia(M33Add(in1.ll, in2.ll),
M33Add(in1.la, in2.la),
M33Add(in1.aa, in2.aa));
}
static PX_FORCE_INLINE FsInertia subtractInertia(const FsInertia& in1, const FsInertia& in2)
{
return FsInertia(M33Sub(in1.ll, in2.ll),
M33Sub(in1.la, in2.la),
M33Sub(in1.aa, in2.aa));
}
static PX_FORCE_INLINE Vec3V axisDot(const Cm::SpatialVectorV S[3], const Cm::SpatialVectorV &v)
{
return V3Merge(FAdd(V3Dot(S[0].linear,v.linear), V3Dot(S[0].angular,v.angular)),
FAdd(V3Dot(S[1].linear,v.linear), V3Dot(S[1].angular,v.angular)),
FAdd(V3Dot(S[2].linear,v.linear), V3Dot(S[2].angular,v.angular)));
}
static PX_FORCE_INLINE Cm::SpatialVectorV axisMultiply(const Cm::SpatialVectorV S[3], Vec3V v)
{
return Cm::SpatialVectorV(V3ScaleAdd(S[0].linear, V3GetX(v), V3ScaleAdd(S[1].linear, V3GetY(v), V3Scale(S[2].linear, V3GetZ(v)))),
V3ScaleAdd(S[0].angular, V3GetX(v), V3ScaleAdd(S[1].angular, V3GetY(v), V3Scale(S[2].angular, V3GetZ(v)))));
}
static PX_FORCE_INLINE Cm::SpatialVectorV subtract(const Cm::SpatialVectorV &a, const Cm::SpatialVectorV &b)
{
return Cm::SpatialVectorV(V3Sub(a.linear, b.linear), V3Sub(a.angular, b.angular));
}
static PX_FORCE_INLINE Cm::SpatialVectorV add(const Cm::SpatialVectorV &a, const Cm::SpatialVectorV &b)
{
return Cm::SpatialVectorV(V3Add(a.linear, b.linear), V3Add(a.angular, b.angular));
}
static PX_FORCE_INLINE Cm::SpatialVectorV multiply(const FsInertia &I, const Cm::SpatialVectorV &S)
{
return Cm::SpatialVectorV(V3Add(M33MulV3(I.ll,S.linear), M33MulV3(I.la,S.angular)),
V3Add(M33TrnspsMulV3(I.la,S.linear), M33MulV3(I.aa,S.angular)));
}
static PX_FORCE_INLINE Cm::SpatialVectorV translateMotion(const Vec3V& p, const Cm::SpatialVectorV& v)
{
return Cm::SpatialVectorV(V3Add(v.linear, V3Cross(p, v.angular)), v.angular);
}
// translate a force resolved at position p to the origin
static PX_FORCE_INLINE Cm::SpatialVectorV translateForce(const Vec3V& p, const Cm::SpatialVectorV& v)
{
return Cm::SpatialVectorV(v.linear, V3Add(v.angular, V3Cross(p, v.linear)));
}
static PX_FORCE_INLINE Mat33V invertSym33(const Mat33V &m)
{
Vec3V a0 = V3Cross(m.col1, m.col2);
Vec3V a1 = V3Cross(m.col2, m.col0);
Vec3V a2 = V3Cross(m.col0, m.col1);
FloatV det = V3Dot(a0, m.col0);
FloatV recipDet = FRecip(det);
a1 = V3SetX(a1, V3GetY(a0));
a2 = V3Merge(V3GetZ(a0), V3GetZ(a1), V3GetZ(a2)); // make sure it's symmetric
return Mat33V(V3Scale(a0, recipDet),
V3Scale(a1, recipDet),
V3Scale(a2, recipDet));
}
static PX_FORCE_INLINE FloatV safeInvSqrt(FloatV v)
{
return FSqrt(FMax(FZero(), FRecip(v)));
}
static PX_FORCE_INLINE Mat33V invSqrt(const Mat33V& m)
{
// cholesky factor to
// (a 0 0)
// (b c 0)
// (d e f)
// except that a,c,f are the reciprocal sqrts rather than sqrts
// PxVec3 v0 = m.column0, v1 = m.column1, v2 = m.column2;
Vec3V v0 = m.col0, v1 = m.col1, v2 = m.col2;
const FloatV x0 = V3GetX(v0), y1 = V3GetY(v1), z2 = V3GetZ(v2);
FloatV a = safeInvSqrt(x0); // PxReal a = PxRecipSqrt(v0.x);
Vec3V abd = V3Scale(v0, a); // PxReal b = v0.y*a;
FloatV b = V3GetY(abd);
FloatV c2 = FNegScaleSub(b, b, y1); // PxReal c = PxRecipSqrt(v1.y - b*b);
FloatV c = safeInvSqrt(c2);
FloatV d = V3GetZ(abd); // PxReal d = v0.z*a;
FloatV e = FMul(FNegScaleSub(b, d, V3GetZ(v1)), c); // PxReal e = (v1.z-d*b) * c;
FloatV f2 = FNegScaleSub(d, d, FNegScaleSub(e, e, z2)); // PxReal f = PxRecipSqrt(v2.z - d*d - e*e);
FloatV f = safeInvSqrt(f2);
// invert
FloatV x = FMul(FMul(b,a),c), // x = -b*a*c
y = FMul((FNegScaleSub(d,a, FMul(e,x))), f), // y = (-e*x-d*a)*f
z = FMul(e, FMul(c,f)); // z = -e*c*f
return Mat33V(V3Merge(a, FZero(), FZero()),
V3Merge(FNeg(x), c, FZero()),
V3Merge(y, FNeg(z), f));
}
static PX_FORCE_INLINE FsInertia invertInertia(const FsInertia &I)
{
Mat33V aa = M33Scale(M33Add(I.aa, M33Trnsps(I.aa)), FHalf());
Mat33V ll = M33Scale(M33Add(I.ll, M33Trnsps(I.ll)), FHalf());
Mat33V AAInv = invertSym33(aa);
Mat33V z = M33MulM33(M33Neg(I.la), AAInv);
Mat33V S = M33Add(ll, M33MulM33(z, M33Trnsps(I.la)));
Mat33V LL = invertSym33(S);
Mat33V LA = M33MulM33(LL, z);
Mat33V AA = M33Add(AAInv, M33MulM33(M33Trnsps(z), LA));
return FsInertia(LL, LA, AA);
}
static PX_NOINLINE Mat33V computeSIS(const FsInertia &I, const Cm::SpatialVectorV S[3], Cm::SpatialVectorV IS[3])
{
Vec3V S0l = S[0].linear, S0a = S[0].angular;
Vec3V S1l = S[1].linear, S1a = S[1].angular;
Vec3V S2l = S[2].linear, S2a = S[2].angular;
Vec3V IS0l = V3Add(M33MulV3(I.ll,S0l), M33MulV3(I.la,S0a));
Vec3V IS0a = V3Add(M33TrnspsMulV3(I.la,S0l), M33MulV3(I.aa,S0a));
Vec3V IS1l = V3Add(M33MulV3(I.ll,S1l), M33MulV3(I.la,S1a));
Vec3V IS1a = V3Add(M33TrnspsMulV3(I.la,S1l), M33MulV3(I.aa,S1a));
Vec3V IS2l = V3Add(M33MulV3(I.ll,S2l), M33MulV3(I.la,S2a));
Vec3V IS2a = V3Add(M33TrnspsMulV3(I.la,S2l), M33MulV3(I.aa,S2a));
// compute SIS
FloatV a00 = FAdd(V3Dot(S0l, IS0l), V3Dot(S0a, IS0a));
FloatV a01 = FAdd(V3Dot(S0l, IS1l), V3Dot(S0a, IS1a));
FloatV a02 = FAdd(V3Dot(S0l, IS2l), V3Dot(S0a, IS2a));
FloatV a11 = FAdd(V3Dot(S1l, IS1l), V3Dot(S1a, IS1a));
FloatV a12 = FAdd(V3Dot(S1l, IS2l), V3Dot(S1a, IS2a));
FloatV a22 = FAdd(V3Dot(S2l, IS2l), V3Dot(S2a, IS2a));
// write IS, a useful side-effect
IS[0].linear = IS0l; IS[0].angular = IS0a;
IS[1].linear = IS1l; IS[1].angular = IS1a;
IS[2].linear = IS2l; IS[2].angular = IS2a;
return Mat33V(V3Merge(a00, a01, a02),
V3Merge(a01, a11, a12),
V3Merge(a02, a12, a22));
}
static PX_FORCE_INLINE FsInertia multiplySubtract(const FsInertia &I, const Mat33V &D, const Cm::SpatialVectorV IS[3], Cm::SpatialVectorV DSI[3])
{
// cut'n'paste, how I love ya, how I love ya
Vec3V IS0l = IS[0].linear, IS0a = IS[0].angular;
Vec3V IS1l = IS[1].linear, IS1a = IS[1].angular;
Vec3V IS2l = IS[2].linear, IS2a = IS[2].angular;
Vec3V D0 = D.col0, D1 = D.col1, D2 = D.col2;
// compute IDS
Vec3V DSI0l = V3ScaleAdd(IS0l, V3GetX(D0), V3ScaleAdd(IS1l, V3GetY(D0), V3Scale(IS2l, V3GetZ(D0))));
Vec3V DSI1l = V3ScaleAdd(IS0l, V3GetX(D1), V3ScaleAdd(IS1l, V3GetY(D1), V3Scale(IS2l, V3GetZ(D1))));
Vec3V DSI2l = V3ScaleAdd(IS0l, V3GetX(D2), V3ScaleAdd(IS1l, V3GetY(D2), V3Scale(IS2l, V3GetZ(D2))));
Vec3V DSI0a = V3ScaleAdd(IS0a, V3GetX(D0), V3ScaleAdd(IS1a, V3GetY(D0), V3Scale(IS2a, V3GetZ(D0))));
Vec3V DSI1a = V3ScaleAdd(IS0a, V3GetX(D1), V3ScaleAdd(IS1a, V3GetY(D1), V3Scale(IS2a, V3GetZ(D1))));
Vec3V DSI2a = V3ScaleAdd(IS0a, V3GetX(D2), V3ScaleAdd(IS1a, V3GetY(D2), V3Scale(IS2a, V3GetZ(D2))));
// compute J = I - DSI' IS. Each row of DSI' IS generates an inertia dyad
Vec3V ll0 = I.ll.col0, ll1 = I.ll.col1, ll2 = I.ll.col2;
Vec3V la0 = I.la.col0, la1 = I.la.col1, la2 = I.la.col2;
Vec3V aa0 = I.aa.col0, aa1 = I.aa.col1, aa2 = I.aa.col2;
#define SUBTRACT_DYAD(_a, _b) \
ll0 = V3NegScaleSub(_b##l, V3GetX(_a##l), ll0); la0 = V3NegScaleSub(_b##l, V3GetX(_a##a), la0); aa0 = V3NegScaleSub(_b##a, V3GetX(_a##a), aa0); \
ll1 = V3NegScaleSub(_b##l, V3GetY(_a##l), ll1); la1 = V3NegScaleSub(_b##l, V3GetY(_a##a), la1); aa1 = V3NegScaleSub(_b##a, V3GetY(_a##a), aa1); \
ll2 = V3NegScaleSub(_b##l, V3GetZ(_a##l), ll2); la2 = V3NegScaleSub(_b##l, V3GetZ(_a##a), la2); aa2 = V3NegScaleSub(_b##a, V3GetZ(_a##a), aa2);
SUBTRACT_DYAD(IS0, DSI0);
SUBTRACT_DYAD(IS1, DSI1);
SUBTRACT_DYAD(IS2, DSI2);
#undef SUBTRACT_DYAD
DSI[0].linear = DSI0l; DSI[0].angular = DSI0a;
DSI[1].linear = DSI1l; DSI[1].angular = DSI1a;
DSI[2].linear = DSI2l; DSI[2].angular = DSI2a;
return FsInertia(Mat33V(ll0, ll1, ll2),
Mat33V(la0, la1, la2),
Mat33V(aa0, aa1, aa2));
}
static PX_FORCE_INLINE FsInertia multiplySubtract(const FsInertia &I, const Cm::SpatialVectorV S[3])
{
// cut'n'paste, how I love ya, how I love ya
const Vec3V S0l = S[0].linear, S0a = S[0].angular;
const Vec3V S1l = S[1].linear, S1a = S[1].angular;
const Vec3V S2l = S[2].linear, S2a = S[2].angular;
// compute J = I - DSI' IS. Each row of DSI' IS generates an inertia dyad
Vec3V ll0 = I.ll.col0, ll1 = I.ll.col1, ll2 = I.ll.col2;
Vec3V la0 = I.la.col0, la1 = I.la.col1, la2 = I.la.col2;
Vec3V aa0 = I.aa.col0, aa1 = I.aa.col1, aa2 = I.aa.col2;
#define SUBTRACT_DYAD(_a, _b) \
ll0 = V3NegScaleSub(_b##l, V3GetX(_a##l), ll0); la0 = V3NegScaleSub(_b##l, V3GetX(_a##a), la0); aa0 = V3NegScaleSub(_b##a, V3GetX(_a##a), aa0); \
ll1 = V3NegScaleSub(_b##l, V3GetY(_a##l), ll1); la1 = V3NegScaleSub(_b##l, V3GetY(_a##a), la1); aa1 = V3NegScaleSub(_b##a, V3GetY(_a##a), aa1); \
ll2 = V3NegScaleSub(_b##l, V3GetZ(_a##l), ll2); la2 = V3NegScaleSub(_b##l, V3GetZ(_a##a), la2); aa2 = V3NegScaleSub(_b##a, V3GetZ(_a##a), aa2);
SUBTRACT_DYAD(S0, S0);
SUBTRACT_DYAD(S1, S1);
SUBTRACT_DYAD(S2, S2);
#undef SUBTRACT_DYAD
return FsInertia(Mat33V(ll0, ll1, ll2),
Mat33V(la0, la1, la2),
Mat33V(aa0, aa1, aa2));
}
static PX_FORCE_INLINE FsInertia translateInertia(Vec3V a, const FsInertia &input)
{
Vec3V b = V3Neg(a);
Vec3V la0 = input.la.col0, la1 = input.la.col1, la2 = input.la.col2;
Vec3V ll0 = input.ll.col0, ll1 = input.ll.col1, ll2 = input.ll.col2;
Vec3V aa0 = input.aa.col0, aa1 = input.aa.col1, aa2 = input.aa.col2;
FloatV aX = V3GetX(a), aY = V3GetY(a), aZ = V3GetZ(a);
FloatV bX = V3GetX(b), bY = V3GetY(b), bZ = V3GetZ(b);
FloatV Z = FZero();
// s - star matrix of a
Vec3V s0 = V3Merge(Z, aZ, bY),
s1 = V3Merge(bZ, Z, aX),
s2 = V3Merge(aY, bX, Z);
// s * la
Vec3V sla0 = V3ScaleAdd(s0, V3GetX(la0), V3ScaleAdd(s1, V3GetY(la0), V3Scale(s2, V3GetZ(la0))));
Vec3V sla1 = V3ScaleAdd(s0, V3GetX(la1), V3ScaleAdd(s1, V3GetY(la1), V3Scale(s2, V3GetZ(la1))));
Vec3V sla2 = V3ScaleAdd(s0, V3GetX(la2), V3ScaleAdd(s1, V3GetY(la2), V3Scale(s2, V3GetZ(la2))));
// ll * s.transpose() (ll is symmetric)
Vec3V llst0 = V3ScaleAdd(ll2, aY, V3Scale(ll1, bZ)),
llst1 = V3ScaleAdd(ll0, aZ, V3Scale(ll2, bX)),
llst2 = V3ScaleAdd(ll1, aX, V3Scale(ll0, bY));
// t = sla+S*llst*0.5f;
Vec3V sllst0 = V3ScaleAdd(s2, V3GetZ(llst0), V3ScaleAdd(s1, V3GetY(llst0), V3Scale(s0, V3GetX(llst0))));
Vec3V sllst1 = V3ScaleAdd(s2, V3GetZ(llst1), V3ScaleAdd(s1, V3GetY(llst1), V3Scale(s0, V3GetX(llst1))));
Vec3V sllst2 = V3ScaleAdd(s2, V3GetZ(llst2), V3ScaleAdd(s1, V3GetY(llst2), V3Scale(s0, V3GetX(llst2))));
Vec3V t0 = V3ScaleAdd(sllst0, FHalf(), sla0);
Vec3V t1 = V3ScaleAdd(sllst1, FHalf(), sla1);
Vec3V t2 = V3ScaleAdd(sllst2, FHalf(), sla2);
// t+t.transpose()
Vec3V r0 = V3Add(t0, V3Merge(V3GetX(t0), V3GetX(t1), V3GetX(t2))),
r1 = V3Add(t1, V3Merge(V3GetY(t0), V3GetY(t1), V3GetY(t2))),
r2 = V3Add(t2, V3Merge(V3GetZ(t0), V3GetZ(t1), V3GetZ(t2)));
return FsInertia(Mat33V(ll0, ll1, ll2),
Mat33V(V3Add(la0, llst0),
V3Add(la1, llst1),
V3Add(la2, llst2)),
Mat33V(V3Add(aa0, r0),
V3Add(aa1, r1),
V3Add(aa2, r2)));
}
};
template<class Base>
class ArticulationFnsSimd : public Base
{
static PX_FORCE_INLINE void axisMultiplyLowerTriangular(Cm::SpatialVectorV ES[3], const Mat33V&E, const Cm::SpatialVectorV S[3])
{
const Vec3V l0 = S[0].linear, l1 = S[1].linear, l2 = S[2].linear;
const Vec3V a0 = S[0].angular, a1 = S[1].angular, a2 = S[2].angular;
ES[0] = Cm::SpatialVectorV(V3Scale(l0, V3GetX(E.col0)),
V3Scale(a0, V3GetX(E.col0)));
ES[1] = Cm::SpatialVectorV(V3ScaleAdd(l0, V3GetX(E.col1), V3Scale(l1, V3GetY(E.col1))),
V3ScaleAdd(a0, V3GetX(E.col1), V3Scale(a1, V3GetY(E.col1))));
ES[2] = Cm::SpatialVectorV(V3ScaleAdd(l0, V3GetX(E.col2), V3ScaleAdd(l1, V3GetY(E.col2), V3Scale(l2, V3GetZ(E.col2)))),
V3ScaleAdd(a0, V3GetX(E.col2), V3ScaleAdd(a1, V3GetY(E.col2), V3Scale(a2, V3GetZ(E.col2)))));
}
public:
static PX_FORCE_INLINE FsInertia propagate(const FsInertia &I,
const Cm::SpatialVectorV S[3],
const Mat33V &load,
const FloatV isf)
{
Cm::SpatialVectorV IS[3], ISE[3];
Mat33V D = Base::computeSIS(I, S, IS);
D.col0 = V3ScaleAdd(load.col0, isf, D.col0);
D.col1 = V3ScaleAdd(load.col1, isf, D.col1);
D.col2 = V3ScaleAdd(load.col2, isf, D.col2);
axisMultiplyLowerTriangular(ISE, Base::invSqrt(D), IS);
return Base::multiplySubtract(I, ISE);
}
static PX_INLINE Cm::SpatialVectorV propagateImpulse(const FsRow& row,
const FsJointVectors& jv,
Vec3V& SZ,
const Cm::SpatialVectorV& Z,
const FsRowAux& aux)
{
PX_UNUSED(aux);
SZ = V3Add(Z.angular, V3Cross(Z.linear, jv.jointOffset));
return Base::translateForce(jv.parentOffset, Z - Base::axisMultiply(row.DSI, SZ));
}
static PX_INLINE Cm::SpatialVectorV propagateVelocity(const FsRow& row,
const FsJointVectors& jv,
const Vec3V& SZ,
const Cm::SpatialVectorV& v,
const FsRowAux& aux)
{
PX_UNUSED(aux);
Cm::SpatialVectorV w = Base::translateMotion(V3Neg(jv.parentOffset), v);
Vec3V DSZ = M33MulV3(row.D, SZ);
Vec3V n = V3Add(Base::axisDot(row.DSI, w), DSZ);
return w - Cm::SpatialVectorV(V3Cross(jv.jointOffset, n), n);
}
static
#if !(PX_ANDROID && PX_DEBUG) // Not inlining for Android Debug to avoid "conditional branch out of range" compilation error on arm64-v8a ABI
PX_FORCE_INLINE
#endif
Mat33V computeDriveInertia(const FsInertia &I0,
const FsInertia &I1,
const Cm::SpatialVectorV S[3])
{
POD_U_LIKE(Cm::SpatialVectorV, 3, 16) IS, ISD, dummy;
Mat33V D = Base::computeSIS(I0, S, IS);
Mat33V DInv = Base::invertSym33(D);
FsInertia tmp = Base::addInertia(I0, I1);
tmp = Base::multiplySubtract(tmp, DInv, IS, ISD);
FsInertia J = Base::invertInertia(tmp);
Mat33V E = Base::computeSIS(J, ISD, dummy);
return Base::invertSym33(M33Add(DInv,E));
}
};
}
}
#endif //DY_ARTICULATION_SIMD_FNS_H

View File

@ -0,0 +1,500 @@
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2021 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#include "foundation/PxVec3.h"
#include "foundation/PxMath.h"
#include "foundation/PxMemory.h"
#include "common/PxProfileZone.h"
#include "PsUtilities.h"
#include "CmSpatialVector.h"
#include "DyArticulationHelper.h"
#include "DyArticulationReference.h"
#include "DyArticulationFnsSimd.h"
#include "DyArticulationFnsScalar.h"
#include "DyArticulationFnsDebug.h"
#include "DySolverConstraintDesc.h"
#include "PxvDynamics.h"
#include "DyArticulation.h"
#include "CmConeLimitHelper.h"
#include "DySolverConstraint1D.h"
#include "PxcConstraintBlockStream.h"
#include "DySolverConstraint1D.h"
#include "DyArticulationPImpl.h"
#include "PsFoundation.h"
namespace physx
{
namespace Dy
{
void getImpulseResponseSlow(const FsData& matrix,
PxU32 linkID0, const Cm::SpatialVectorV& impulse0, Cm::SpatialVectorV& deltaV0,
PxU32 linkID1, const Cm::SpatialVectorV& impulse1, Cm::SpatialVectorV& deltaV1)
{
typedef ArticulationFnsSimd<ArticulationFnsSimdBase> Fns;
const FsRow* rows = getFsRows(matrix);
const FsRowAux* aux = getAux(matrix);
const FsJointVectors* jointVectors = getJointVectors(matrix);
PX_ASSERT(matrix.linkCount<=DY_ARTICULATION_MAX_SIZE);
PxU32 stack[DY_ARTICULATION_MAX_SIZE];
Vec3V SZ[DY_ARTICULATION_MAX_SIZE];
PxU32 i0, i1, ic;
for(i0 = linkID0, i1 = linkID1; i0!=i1;) // find common path
{
if(i0<i1)
i1 = matrix.parent[i1];
else
i0 = matrix.parent[i0];
}
PxU32 common = i0;
Cm::SpatialVectorV Z0 = -impulse0, Z1 = -impulse1;
for(i0 = 0; linkID0!=common; linkID0 = matrix.parent[linkID0])
{
Z0 = Fns::propagateImpulse(rows[linkID0], jointVectors[linkID0], SZ[linkID0], Z0, aux[linkID0]);
stack[i0++] = linkID0;
}
for(i1 = i0; linkID1!=common; linkID1 = matrix.parent[linkID1])
{
Z1 = Fns::propagateImpulse(rows[linkID1], jointVectors[linkID1], SZ[linkID1], Z1, aux[linkID1]);
stack[i1++] = linkID1;
}
Cm::SpatialVectorV Z = Z0 + Z1;
for(ic = i1; common; common = matrix.parent[common])
{
Z = Fns::propagateImpulse(rows[common], jointVectors[common], SZ[common], Z, aux[common]);
stack[ic++] = common;
}
Cm::SpatialVectorV v = Fns::multiply(getRootInverseInertia(matrix), -Z);
for(PxU32 index = ic; index-->i1 ;)
v = Fns::propagateVelocity(rows[stack[index]], jointVectors[stack[index]], SZ[stack[index]], v, aux[stack[index]]);
deltaV1 = v;
for(PxU32 index = i1; index-->i0 ;)
deltaV1 = Fns::propagateVelocity(rows[stack[index]], jointVectors[stack[index]], SZ[stack[index]], deltaV1, aux[stack[index]]);
deltaV0 = v;
for(PxU32 index = i0; index-->0;)
deltaV0 = Fns::propagateVelocity(rows[stack[index]], jointVectors[stack[index]], SZ[stack[index]], deltaV0, aux[stack[index]]);
}
void PxcFsGetImpulseResponse(const FsData& matrix,
PxU32 linkID, const Cm::SpatialVectorV& impulse, Cm::SpatialVectorV& deltaV)
{
typedef ArticulationFnsSimd<ArticulationFnsSimdBase> Fns;
PX_ASSERT(matrix.linkCount<=DY_ARTICULATION_MAX_SIZE);
Vec3V SZ[DY_ARTICULATION_MAX_SIZE];
const FsRow* rows = getFsRows(matrix);
const FsRowAux* aux = getAux(matrix);
const FsJointVectors* jointVectors = getJointVectors(matrix);
Cm::SpatialVectorV Z = -impulse;
for(PxU32 i = linkID; i; i = matrix.parent[i])
Z = Fns::propagateImpulse(rows[i], jointVectors[i], SZ[i], Z, aux[i]);
deltaV = Fns::multiply(getRootInverseInertia(matrix), -Z);
PX_ASSERT(rows[linkID].pathToRoot&1);
for(ArticulationBitField i=rows[linkID].pathToRoot-1; i; i &= (i-1))
{
const PxU32 index = ArticulationLowestSetBit(i);
deltaV = Fns::propagateVelocity(rows[index], jointVectors[index], SZ[index], deltaV, aux[index]);
}
}
void PxcFsGetImpulseSelfResponse(const FsData& matrix,
PxU32 linkID0, const Cm::SpatialVectorV& impulse0, Cm::SpatialVectorV& deltaV0,
PxU32 linkID1, const Cm::SpatialVectorV& impulse1, Cm::SpatialVectorV& deltaV1)
{
typedef ArticulationFnsSimd<ArticulationFnsSimdBase> Fns;
PX_ASSERT(linkID0 != linkID1);
const FsRow* rows = getFsRows(matrix);
const FsRowAux* aux = getAux(matrix);
const FsJointVectors* jointVectors = getJointVectors(matrix);
// standard case: parent-child limit
if(matrix.parent[linkID1] == linkID0)
{
Vec3V SZ;
const Cm::SpatialVectorV Z = impulse0 - Fns::propagateImpulse(rows[linkID1], jointVectors[linkID1], SZ, -impulse1, aux[linkID1]);
PxcFsGetImpulseResponse(matrix, linkID0, Z, deltaV0);
deltaV1 = Fns::propagateVelocity(rows[linkID1], jointVectors[linkID1], SZ, deltaV0, aux[linkID1]);
}
else
getImpulseResponseSlow(matrix, linkID0, impulse0, deltaV0, linkID1, impulse1, deltaV1);
#if DY_ARTICULATION_DEBUG_VERIFY
Cm::SpatialVector V[DY_ARTICULATION_MAX_SIZE];
for(PxU32 i=0;i<matrix.linkCount;i++) V[i] = Cm::SpatialVector::zero();
ArticulationRef::applyImpulse(matrix,V,linkID0, reinterpret_cast<const Cm::SpatialVector&>(impulse0));
ArticulationRef::applyImpulse(matrix,V,linkID1, reinterpret_cast<const Cm::SpatialVector&>(impulse1));
Cm::SpatialVector refV0 = V[linkID0];
Cm::SpatialVector refV1 = V[linkID1];
#endif
}
namespace
{
PX_FORCE_INLINE Cm::SpatialVectorV getImpulseResponseSimd(const FsData& matrix, PxU32 linkID, Vec3V lZ, Vec3V aZ)
{
PX_ASSERT(matrix.linkCount<=DY_ARTICULATION_MAX_SIZE);
Vec3V SZ[DY_ARTICULATION_MAX_SIZE];
PxU32 indices[DY_ARTICULATION_MAX_SIZE], iCount = 0;
const FsRow*PX_RESTRICT rows = getFsRows(matrix);
const FsRowAux*PX_RESTRICT aux = getAux(matrix);
const FsJointVectors* jointVectors = getJointVectors(matrix);
PX_UNUSED(aux);
PX_ASSERT(rows[linkID].pathToRoot&1);
lZ = V3Neg(lZ);
aZ = V3Neg(aZ);
for(PxU32 i = linkID; i; i = matrix.parent[i])
{
const FsRow& r = rows[i];
const FsJointVectors& j = jointVectors[i];
Vec3V sz = V3Add(aZ, V3Cross(lZ, j.jointOffset));
SZ[iCount] = sz;
lZ = V3NegScaleSub(r.DSI[0].linear, V3GetX(sz), V3NegScaleSub(r.DSI[1].linear, V3GetY(sz), V3NegScaleSub(r.DSI[2].linear, V3GetZ(sz), lZ)));
aZ = V3NegScaleSub(r.DSI[0].angular, V3GetX(sz), V3NegScaleSub(r.DSI[1].angular, V3GetY(sz), V3NegScaleSub(r.DSI[2].angular, V3GetZ(sz), aZ)));
aZ = V3Add(aZ, V3Cross(j.parentOffset, lZ));
indices[iCount++] = i;
}
const FsInertia& I = getRootInverseInertia(matrix);
Vec3V lV = V3Neg(V3Add(M33MulV3(I.ll, lZ), M33MulV3(I.la, aZ)));
Vec3V aV = V3Neg(V3Add(M33TrnspsMulV3(I.la, lZ), M33MulV3(I.aa, aZ)));
while(iCount)
{
PxU32 i = indices[--iCount];
const FsRow& r = rows[i];
const FsJointVectors& j = jointVectors[i];
lV = V3Sub(lV, V3Cross(j.parentOffset, aV));
Vec3V n = V3Add(V3Merge(V3Dot(r.DSI[0].linear, lV), V3Dot(r.DSI[1].linear, lV), V3Dot(r.DSI[2].linear, lV)),
V3Merge(V3Dot(r.DSI[0].angular, aV), V3Dot(r.DSI[1].angular, aV), V3Dot(r.DSI[2].angular, aV)));
n = V3Add(n, M33MulV3(r.D, SZ[iCount]));
lV = V3Sub(lV, V3Cross(j.jointOffset, n));
aV = V3Sub(aV, n);
}
return Cm::SpatialVectorV(lV, aV);
}
}
void ArticulationHelper::getImpulseResponse(const FsData& matrix,
PxU32 linkID, const Cm::SpatialVectorV& impulse, Cm::SpatialVectorV& deltaV)
{
PX_ASSERT(matrix.linkCount<=DY_ARTICULATION_MAX_SIZE);
deltaV = getImpulseResponseSimd(matrix, linkID, impulse.linear, impulse.angular);
#if DY_ARTICULATION_DEBUG_VERIFY
Cm::SpatialVectorV deltaV_;
PxcFsGetImpulseResponse(matrix, linkID, impulse, deltaV_);
PX_ASSERT(almostEqual(deltaV_, deltaV,1e-3f));
#endif
}
void ArticulationHelper::getImpulseSelfResponse(const FsData& matrix,
PxU32 linkID0, const Cm::SpatialVectorV& impulse0, Cm::SpatialVectorV& deltaV0,
PxU32 linkID1, const Cm::SpatialVectorV& impulse1, Cm::SpatialVectorV& deltaV1)
{
PX_ASSERT(linkID0 != linkID1);
const FsRow* rows = getFsRows(matrix);
const FsRowAux* aux = getAux(matrix);
const FsJointVectors* jointVectors = getJointVectors(matrix);
PX_UNUSED(aux);
Cm::SpatialVectorV& dV0 = deltaV0,
& dV1 = deltaV1;
// standard case: parent-child limit
if(matrix.parent[linkID1] == linkID0)
{
const FsRow& r = rows[linkID1];
const FsJointVectors& j = jointVectors[linkID1];
Vec3V lZ = V3Neg(impulse1.linear),
aZ = V3Neg(impulse1.angular);
Vec3V sz = V3Add(aZ, V3Cross(lZ, j.jointOffset));
lZ = V3Sub(lZ, V3ScaleAdd(r.DSI[0].linear, V3GetX(sz), V3ScaleAdd(r.DSI[1].linear, V3GetY(sz), V3Scale(r.DSI[2].linear, V3GetZ(sz)))));
aZ = V3Sub(aZ, V3ScaleAdd(r.DSI[0].angular, V3GetX(sz), V3ScaleAdd(r.DSI[1].angular, V3GetY(sz), V3Scale(r.DSI[2].angular, V3GetZ(sz)))));
aZ = V3Add(aZ, V3Cross(j.parentOffset, lZ));
lZ = V3Sub(impulse0.linear, lZ);
aZ = V3Sub(impulse0.angular, aZ);
dV0 = getImpulseResponseSimd(matrix, linkID0, lZ, aZ);
Vec3V aV = dV0.angular;
Vec3V lV = V3Sub(dV0.linear, V3Cross(j.parentOffset, aV));
Vec3V n = V3Add(V3Merge(V3Dot(r.DSI[0].linear, lV), V3Dot(r.DSI[1].linear, lV), V3Dot(r.DSI[2].linear, lV)),
V3Merge(V3Dot(r.DSI[0].angular, aV), V3Dot(r.DSI[1].angular, aV), V3Dot(r.DSI[2].angular, aV)));
n = V3Add(n, M33MulV3(r.D, sz));
lV = V3Sub(lV, V3Cross(j.jointOffset, n));
aV = V3Sub(aV, n);
dV1 = Cm::SpatialVectorV(lV, aV);
}
else
{
#if 0
getImpulseResponseSlow(matrix, linkID0, impulse0, deltaV0, linkID1, impulse1, deltaV1);
#else
getImpulseResponse(matrix, linkID0, impulse0, deltaV0);
getImpulseResponse(matrix, linkID1, impulse1, deltaV1);
#endif
}
#if DY_ARTICULATION_DEBUG_VERIFY
Cm::SpatialVectorV dV0_, dV1_;
PxcFsGetImpulseSelfResponse(matrix, linkID0, impulse0, dV0_, linkID1, impulse1, dV1_);
PX_ASSERT(almostEqual(dV0_, dV0, 1e-3f));
PX_ASSERT(almostEqual(dV1_, dV1, 1e-3f));
#endif
}
PxU32 ArticulationHelper::getFsDataSize(PxU32 linkCount)
{
return sizeof(FsInertia) + sizeof(FsRow) * linkCount;
}
PxU32 ArticulationHelper::getLtbDataSize(PxU32 linkCount)
{
return sizeof(LtbRow) * linkCount;
}
void ArticulationHelper::createHardLimit( const FsData& fsData,
const ArticulationLink* links,
PxU32 linkIndex,
SolverConstraint1DExt& s,
const PxVec3& axis,
PxReal err,
PxReal recipDt)
{
init(s, PxVec3(0), PxVec3(0), axis, axis, 0, PX_MAX_F32);
ArticulationHelper::getImpulseSelfResponse(fsData,
links[linkIndex].parent,Cm::SpatialVector(PxVec3(0), axis), s.deltaVA,
linkIndex, Cm::SpatialVector(PxVec3(0), -axis), s.deltaVB);
const PxReal unitResponse = axis.dot(reinterpret_cast<PxVec3&>(s.deltaVA.angular)) - axis.dot(reinterpret_cast<PxVec3&>(s.deltaVB.angular));
if(unitResponse<0.0f)
Ps::getFoundation().error(PxErrorCode::eDEBUG_WARNING, __FILE__, __LINE__, "Warning: articulation ill-conditioned or under severe stress, joint limit ignored");
const PxReal recipResponse = unitResponse>0.0f ? 1.0f/unitResponse : 0.0f;
s.constant = recipResponse * -err * recipDt;
s.unbiasedConstant = err>0.0f ? s.constant : 0.0f;
s.velMultiplier = -recipResponse;
s.impulseMultiplier = 1.0f;
}
void ArticulationHelper::createTangentialSpring(const FsData& fsData,
const ArticulationLink* links,
PxU32 linkIndex,
SolverConstraint1DExt& s,
const PxVec3& axis,
PxReal stiffness,
PxReal damping,
PxReal dt)
{
init(s, PxVec3(0), PxVec3(0), axis, axis, -PX_MAX_F32, PX_MAX_F32);
Cm::SpatialVector axis6(PxVec3(0), axis);
PxU32 parent = links[linkIndex].parent;
getImpulseSelfResponse(fsData, parent, axis6, s.deltaVA, linkIndex, -axis6, s.deltaVB);
const PxReal unitResponse = axis.dot(reinterpret_cast<PxVec3&>(s.deltaVA.angular)) - axis.dot(reinterpret_cast<PxVec3&>(s.deltaVB.angular));
if(unitResponse<0.0f)
Ps::getFoundation().error(PxErrorCode::eDEBUG_WARNING, __FILE__, __LINE__, "Warning: articulation ill-conditioned or under severe stress, tangential spring ignored");
const PxReal recipResponse = unitResponse>0.0F ? 1.0f/unitResponse : 0.0f;
// this is a specialization of the spring code in setSolverConstants() for acceleration springs.
// general case is b = dt * (c.mods.spring.damping * c.velocityTarget - c.mods.spring.stiffness * geomError);
// but geomError and velocityTarget are both zero
const PxReal a = dt * dt * stiffness + dt * damping;
const PxReal x = 1.0f/(1.0f+a);
s.constant = s.unbiasedConstant = 0.0f;
s.velMultiplier = -x * recipResponse * a;
s.impulseMultiplier = 1.0f - x;
}
PxU32 ArticulationHelper::setupSolverConstraints( Articulation& articulation, PxU32 /*solverDataSize*/,
PxConstraintAllocator& allocator,
PxSolverConstraintDesc* constraintDesc,
const ArticulationLink* links,
const ArticulationJointTransforms* jointTransforms,
PxReal dt,
PxU32& acCount)
{
acCount = 0;
FsData& fsData = *articulation.getFsDataPtr();
const PxU16 linkCount = fsData.linkCount;
PxU32 descCount = 0;
const PxReal recipDt = 1.0f/dt;
const PxConstraintInvMassScale ims(1.0f, 1.0f, 1.0f, 1.0f);
for(PxU16 i=1;i<linkCount;i++)
{
const ArticulationJointCore& j = static_cast<const ArticulationJointCore&>(*links[i].inboundJoint);
if(i+1<linkCount)
{
Ps::prefetch(links[i+1].inboundJoint, sizeof (ArticulationJointCore));
Ps::prefetch(&jointTransforms[i+1], sizeof(ArticulationJointTransforms));
}
if(!(j.twistLimited || j.swingLimited))
continue;
PxQuat swing, twist;
Ps::separateSwingTwist(jointTransforms[i].cB2cA.q, swing, twist);
Cm::ConeLimitHelper eh(j.tanQSwingY, j.tanQSwingZ, j.tanQSwingPad);
PxVec3 swingLimitAxis;
PxReal swingLimitError = 0.0f;
const bool swingLimited = j.swingLimited && eh.getLimit(swing, swingLimitAxis, swingLimitError);
const bool tangentialStiffness = swingLimited && (j.tangentialStiffness>0 || j.tangentialDamping>0);
const PxVec3 twistAxis = jointTransforms[i].cB2w.rotate(PxVec3(1.0f,0,0));
const PxReal tqTwistAngle = Ps::tanHalf(twist.x, twist.w);
const bool twistLowerLimited = j.twistLimited && tqTwistAngle < Cm::tanAdd(j.tanQTwistLow, j.tanQTwistPad);
const bool twistUpperLimited = j.twistLimited && tqTwistAngle > Cm::tanAdd(j.tanQTwistHigh, -j.tanQTwistPad);
const PxU8 constraintCount = PxU8(swingLimited + tangentialStiffness + twistUpperLimited + twistLowerLimited);
if(!constraintCount)
continue;
PxSolverConstraintDesc& desc = constraintDesc[descCount++];
desc.articulationA = &articulation;
desc.linkIndexA = Ps::to16(links[i].parent);
desc.articulationB = &articulation;
desc.linkIndexB = i;
const PxU32 constraintLength = sizeof(SolverConstraint1DHeader) +
sizeof(SolverConstraint1DExt) * constraintCount;
PX_ASSERT(0==(constraintLength & 0x0f));
desc.constraintLengthOver16 = Ps::to16(constraintLength/16);
//desc.constraint = stream.reserve(constraintLength + 16u, constraintBlockManager);
desc.constraint = allocator.reserveConstraintData(constraintLength + 16u);
desc.writeBack = NULL;
SolverConstraint1DHeader* header = reinterpret_cast<SolverConstraint1DHeader*>(desc.constraint);
SolverConstraint1DExt* constraints = reinterpret_cast<SolverConstraint1DExt*>(desc.constraint + sizeof(SolverConstraint1DHeader));
init(*header, constraintCount, true, ims);
PxU32 cIndex = 0;
if(swingLimited)
{
const PxVec3 normal = jointTransforms[i].cA2w.rotate(swingLimitAxis);
createHardLimit(fsData, links, i, constraints[cIndex++], normal, swingLimitError, recipDt);
if(tangentialStiffness)
{
const PxVec3 tangent = twistAxis.cross(normal).getNormalized();
createTangentialSpring(fsData, links, i, constraints[cIndex++], tangent, j.tangentialStiffness, j.tangentialDamping, dt);
}
}
if(twistUpperLimited)
createHardLimit(fsData, links, i, constraints[cIndex++], twistAxis, (j.tanQTwistHigh - tqTwistAngle)*4, recipDt);
if(twistLowerLimited)
createHardLimit(fsData, links, i, constraints[cIndex++], -twistAxis, -(j.tanQTwistLow - tqTwistAngle)*4, recipDt);
*(desc.constraint + getConstraintLength(desc)) = 0;
PX_ASSERT(cIndex == constraintCount);
acCount += constraintCount;
}
return descCount;
}
ArticulationPImpl::ComputeUnconstrainedVelocitiesFn ArticulationPImpl::sComputeUnconstrainedVelocities[2] = { NULL, NULL };
ArticulationPImpl::UpdateBodiesFn ArticulationPImpl::sUpdateBodies[2] = { NULL, NULL };
ArticulationPImpl::UpdateBodiesFn ArticulationPImpl::sUpdateBodiesTGS[2] = { NULL, NULL };
ArticulationPImpl::SaveVelocityFn ArticulationPImpl::sSaveVelocity[2] = { NULL, NULL };
ArticulationPImpl::SaveVelocityTGSFn ArticulationPImpl::sSaveVelocityTGS[2] = { NULL, NULL };
ArticulationPImpl::UpdateDeltaMotionFn ArticulationPImpl::sUpdateDeltaMotion[2] = { NULL, NULL };
ArticulationPImpl::DeltaMotionToMotionVelFn ArticulationPImpl::sDeltaMotionToMotionVel[2] = { NULL, NULL };
ArticulationPImpl::ComputeUnconstrainedVelocitiesTGSFn ArticulationPImpl::sComputeUnconstrainedVelocitiesTGS[2] = { NULL, NULL };
ArticulationPImpl::SetupInternalConstraintsTGSFn ArticulationPImpl::sSetupInternalConstraintsTGS[2] = { NULL, NULL };
}
}

View File

@ -0,0 +1,209 @@
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2021 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#ifndef DY_ARTICULATION_HELPER_H
#define DY_ARTICULATION_HELPER_H
#include "DyArticulation.h"
namespace physx
{
struct PxsBodyCore;
struct PxSolverConstraintDesc;
struct PxSolverConstraintDesc;
namespace Dy
{
struct FsInertia;
struct SolverConstraint1DExt;
struct ArticulationJointCore;
struct ArticulationSolverDesc;
struct SolverConstraint1DExtStep;
struct PxcFsScratchAllocator;
struct ArticulationJointTransforms
{
PxTransform cA2w; // joint parent frame in world space
PxTransform cB2w; // joint child frame in world space
PxTransform cB2cA; // joint relative pose in world space
};
class ArticulationHelper
{
public:
static void getImpulseResponse( const FsData& matrix,
PxU32 linkID,
const Cm::SpatialVectorV& impulse,
Cm::SpatialVectorV& deltaV);
static PX_FORCE_INLINE
void getImpulseResponse( const FsData& matrix,
PxU32 linkID,
const Cm::SpatialVector& impulse,
Cm::SpatialVector& deltaV)
{
getImpulseResponse(matrix, linkID, reinterpret_cast<const Cm::SpatialVectorV&>(impulse), reinterpret_cast<Cm::SpatialVectorV&>(deltaV));
}
static void getImpulseSelfResponse(const FsData& matrix,
PxU32 linkID0,
const Cm::SpatialVectorV& impulse0,
Cm::SpatialVectorV& deltaV0,
PxU32 linkID1,
const Cm::SpatialVectorV& impulse1,
Cm::SpatialVectorV& deltaV1);
//static void flushVelocity(FsData& matrix);
static void saveVelocity(const ArticulationSolverDesc& m, Cm::SpatialVectorF* deltaV);
static void saveVelocityTGS(const ArticulationSolverDesc& m, const PxReal invDt);
//static void recordDeltaMotion(const ArticulationSolverDesc& m, PxReal dt);
//static void deltaMotionToMotionVelocity(const ArticulationSolverDesc& m, PxReal invDt);
//static void getDataSizes(PxU32 linkCount, PxU32 &solverDataSize, PxU32& totalSize, PxU32& scratchSize);
/*static void initializeDriveCache(Articulation& articulation,
FsData &data,
PxU16 linkCount,
const ArticulationLink* links,
PxReal compliance,
PxU32 iterations,
char* scratchMemory,
PxU32 scratchMemorySize);*/
//static PxU32 getDriveCacheLinkCount(const FsData& cache);
/*static void applyImpulses(const FsData& matrix,
Cm::SpatialVectorV* Z,
Cm::SpatialVectorV* V);*/
//private:
static PxU32 getLtbDataSize(PxU32 linkCount);
static PxU32 getFsDataSize(PxU32 linkCount);
//static void prepareDataBlock(ArticulationV& articualtion,
// const ArticulationLink* links,
// PxU16 linkCount,
// PxTransform* poses,
// FsInertia *baseInertia,
// ArticulationJointTransforms* jointTransforms,
// PxU32 expectedSize);
//static void setInertia(FsInertia& inertia,
// const PxsBodyCore& body,
// const PxTransform& pose);
//static void setJointTransforms(ArticulationJointTransforms& transforms,
// const PxTransform& parentPose,
// const PxTransform& childPose,
// const ArticulationJointCore& joint);
/*static void prepareLtbMatrix(FsData& fsData,
const FsInertia* baseInertia,
const PxTransform* poses,
const ArticulationJointTransforms* jointTransforms,
PxReal recipDt);*/
//static void prepareFsData(FsData& fsData,
// const ArticulationLink* links);
//static PX_FORCE_INLINE PxReal getResistance(PxReal compliance);
static void createHardLimit(const FsData& fsData,
const ArticulationLink* links,
PxU32 linkIndex,
SolverConstraint1DExt& s,
const PxVec3& axis,
PxReal err,
PxReal recipDt);
static void createTangentialSpring(const FsData& fsData,
const ArticulationLink* links,
PxU32 linkIndex,
SolverConstraint1DExt& s,
const PxVec3& axis,
PxReal stiffness,
PxReal damping,
PxReal dt);
static void createHardLimitTGS(const FsData& fsData,
const ArticulationLink* links,
PxU32 linkIndex,
SolverConstraint1DExtStep& s,
const PxVec3& axis,
PxReal err,
PxReal recipDt);
static void createTangentialSpringTGS(const FsData& fsData,
const ArticulationLink* links,
PxU32 linkIndex,
SolverConstraint1DExtStep& s,
const PxVec3& axis,
PxReal stiffness,
PxReal damping,
PxReal dt);
static PxU32 setupSolverConstraints(Articulation& articulation, PxU32 solverDataSize,
PxConstraintAllocator& allocator,
PxSolverConstraintDesc* constraintDesc,
const ArticulationLink* links,
const ArticulationJointTransforms* jointTransforms,
PxReal dt,
PxU32& acCount);
/*static void computeUnconstrainedVelocitiesInternal(const ArticulationSolverDesc& desc,
PxReal dt,
const PxVec3& gravity, PxU64 contextID,
FsInertia* PX_RESTRICT baseInertia,
ArticulationJointTransforms* PX_RESTRICT jointTransforms,
PxcFsScratchAllocator& allocator);*/
/*static void computeJointDrives(FsData& fsData,
Ps::aos::Vec3V* drives,
const ArticulationLink* links,
const PxTransform* poses,
const ArticulationJointTransforms* transforms,
const Ps::aos::Mat33V* loads,
PxReal dt);*/
};
}
}
#endif //DY_ARTICULATION_HELPER_H

View File

@ -0,0 +1,202 @@
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2021 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#ifndef DY_ARTICULATION_INTERFACE_H
#define DY_ARTICULATION_INTERFACE_H
#include "DyArticulationUtils.h"
namespace physx
{
class PxcConstraintBlockStream;
class PxcScratchAllocator;
class PxsConstraintBlockManager;
struct PxSolverConstraintDesc;
namespace Dy
{
struct ArticulationSolverDesc;
class ArticulationPImpl
{
public:
typedef PxU32 (*ComputeUnconstrainedVelocitiesFn)(const ArticulationSolverDesc& desc,
PxReal dt,
PxConstraintAllocator& allocator,
PxSolverConstraintDesc* constraintDesc,
PxU32& acCount,
const PxVec3& gravity, PxU64 contextID,
Cm::SpatialVectorF* Z, Cm::SpatialVectorF* DeltaV);
typedef void (*UpdateBodiesFn)(const ArticulationSolverDesc& desc, PxReal dt);
typedef void (*SaveVelocityFn)(const ArticulationSolverDesc &m, Cm::SpatialVectorF* deltaV);
typedef void(*SaveVelocityTGSFn)(const ArticulationSolverDesc& m, PxReal invDtF32);
typedef PxU32(*SetupInternalConstraintsTGSFn)(const ArticulationSolverDesc& desc,
PxcConstraintBlockStream& stream,
PxSolverConstraintDesc* constraintDesc,
PxReal dt,
PxReal invDt,
PxReal totalDt,
PxU32& acCount,
PxsConstraintBlockManager& constraintBlockManager,
Cm::SpatialVectorF* Z);
typedef void(*ComputeUnconstrainedVelocitiesTGSFn)(const ArticulationSolverDesc& desc,
PxReal dt,
const PxVec3& gravity, PxU64 contextID, Cm::SpatialVectorF* Z, Cm::SpatialVectorF* DeltaV);
typedef void(*UpdateDeltaMotionFn)(const ArticulationSolverDesc &m, const PxReal dt, Cm::SpatialVectorF* DeltaV, const PxReal totalInvDt);
typedef void(*DeltaMotionToMotionVelFn)(const ArticulationSolverDesc &m, const PxReal dt);
static ComputeUnconstrainedVelocitiesFn sComputeUnconstrainedVelocities[2];
static UpdateBodiesFn sUpdateBodies[2];
static UpdateBodiesFn sUpdateBodiesTGS[2];
static SaveVelocityFn sSaveVelocity[2];
static SaveVelocityTGSFn sSaveVelocityTGS[2];
static UpdateDeltaMotionFn sUpdateDeltaMotion[2];
static DeltaMotionToMotionVelFn sDeltaMotionToMotionVel[2];
static ComputeUnconstrainedVelocitiesTGSFn sComputeUnconstrainedVelocitiesTGS[2];
static SetupInternalConstraintsTGSFn sSetupInternalConstraintsTGS[2];
static PxU32 computeUnconstrainedVelocities(const ArticulationSolverDesc& desc,
PxReal dt,
PxConstraintAllocator& allocator,
PxSolverConstraintDesc* constraintDesc,
PxU32& acCount,
PxcScratchAllocator&,
const PxVec3& gravity, PxU64 contextID,
Cm::SpatialVectorF* Z, Cm::SpatialVectorF* deltaV)
{
PxU32 type = desc.articulation->getType();
PX_ASSERT(sComputeUnconstrainedVelocities[type]);
if (sComputeUnconstrainedVelocities[type])
return (sComputeUnconstrainedVelocities[type])(desc, dt, allocator, constraintDesc, acCount,
gravity, contextID, Z, deltaV);
else
return 0;
}
static void updateBodies(const ArticulationSolverDesc& desc,
PxReal dt)
{
PxU32 type = desc.articulation->getType();
PX_ASSERT(sUpdateBodies[type]);
if (sUpdateBodies[type])
(*sUpdateBodies[type])(desc, dt);
}
static void updateBodiesTGS(const ArticulationSolverDesc& desc,
PxReal dt)
{
PxU32 type = desc.articulation->getType();
PX_ASSERT(sUpdateBodiesTGS[type]);
if (sUpdateBodiesTGS[type])
(*sUpdateBodiesTGS[type])(desc, dt);
}
static void saveVelocity(const ArticulationSolverDesc& desc, Cm::SpatialVectorF* deltaV)
{
PxU32 type = desc.articulation->getType();
PX_ASSERT(sSaveVelocity[type]);
if (sSaveVelocity[type])
(*sSaveVelocity[type])(desc, deltaV);
}
static void saveVelocityTGS(const ArticulationSolverDesc& desc, PxReal invDtF32)
{
PX_UNUSED(desc);
PX_UNUSED(invDtF32);
PxU32 type = desc.articulation->getType();
PX_ASSERT(sSaveVelocityTGS[type]);
if (sSaveVelocityTGS[type])
(*sSaveVelocityTGS[type])(desc, invDtF32);
}
static void computeUnconstrainedVelocitiesTGS(const ArticulationSolverDesc& desc,
PxReal dt,
const PxVec3& gravity, PxU64 contextID, Cm::SpatialVectorF* Z, Cm::SpatialVectorF* DeltaV)
{
PxU32 type = desc.articulation->getType();
PX_ASSERT(sComputeUnconstrainedVelocitiesTGS[type]);
if (sComputeUnconstrainedVelocitiesTGS[type])
(sComputeUnconstrainedVelocitiesTGS[type])(desc, dt, gravity, contextID, Z, DeltaV);
}
static void updateDeltaMotion(const ArticulationSolverDesc& desc, const PxReal dt, Cm::SpatialVectorF* DeltaV, const PxReal totalInvDt)
{
PxU32 type = desc.articulation->getType();
PX_ASSERT(sUpdateDeltaMotion[type]);
if (sUpdateDeltaMotion[type])
(*sUpdateDeltaMotion[type])(desc, dt, DeltaV, totalInvDt);
}
static void deltaMotionToMotionVel(const ArticulationSolverDesc& desc, const PxReal invDt)
{
PxU32 type = desc.articulation->getType();
PX_ASSERT(sDeltaMotionToMotionVel[type]);
if (sDeltaMotionToMotionVel[type])
(*sDeltaMotionToMotionVel)(desc, invDt);
}
static PxU32 setupSolverInternalConstraintsTGS(const ArticulationSolverDesc& desc,
PxcConstraintBlockStream& stream,
PxSolverConstraintDesc* constraintDesc,
PxReal dt,
PxReal invDt,
PxReal totalDt,
PxU32& acCount,
PxsConstraintBlockManager& constraintBlockManager,
Cm::SpatialVectorF* Z)
{
PxU32 type = desc.articulation->getType();
PX_ASSERT(sSetupInternalConstraintsTGS[type]);
if (sSetupInternalConstraintsTGS[type])
return sSetupInternalConstraintsTGS[type](desc, stream, constraintDesc, dt, invDt, totalDt, acCount, constraintBlockManager, Z);
return 0;
}
};
}
}
#endif //DY_ARTICULATION_INTERFACE_H

View File

@ -0,0 +1,92 @@
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2021 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#ifndef DY_ARTICULATION_REFERENCE_H
#define DY_ARTICULATION_REFERENCE_H
// a per-row struct where we put extra data for debug and setup - ultimately this will move to be just
// debug only
#include "DyArticulationUtils.h"
#include "DyArticulationScalar.h"
#include "DyArticulationFnsScalar.h"
#include "DySpatial.h"
#if DY_ARTICULATION_DEBUG_VERIFY
namespace physx
{
PX_FORCE_INLINE Cm::SpatialVector propagateVelocity(const FsRow& row,
const FsJointVectors& jv,
const PxVec3& SZ,
const Cm::SpatialVector& v,
const FsRowAux& aux)
{
typedef ArticulationFnsScalar Fns;
Cm::SpatialVector w = Fns::translateMotion(-getParentOffset(jv), v);
PxVec3 DSZ = Fns::multiply(row.D, SZ);
PxVec3 n = Fns::axisDot(getDSI(row), w) + DSZ;
Cm::SpatialVector result = w - Cm::SpatialVector(getJointOffset(jv).cross(n),n);
#if DY_ARTICULATION_DEBUG_VERIFY
Cm::SpatialVector check = ArticulationRef::propagateVelocity(row, jv, SZ, v, aux);
PX_ASSERT((result-check).magnitude()<1e-5*PxMax(check.magnitude(), 1.0f));
#endif
return result;
}
PX_FORCE_INLINE Cm::SpatialVector propagateImpulse(const FsRow& row,
const FsJointVectors& jv,
PxVec3& SZ,
const Cm::SpatialVector& Z,
const FsRowAux& aux)
{
typedef ArticulationFnsScalar Fns;
SZ = Z.angular + Z.linear.cross(getJointOffset(jv));
Cm::SpatialVector result = Fns::translateForce(getParentOffset(jv), Z - Fns::axisMultiply(getDSI(row), SZ));
#if DY_ARTICULATION_DEBUG_VERIFY
PxVec3 SZcheck;
Cm::SpatialVector check = ArticulationRef::propagateImpulse(row, jv, SZcheck, Z, aux);
PX_ASSERT((result-check).magnitude()<1e-5*PxMax(check.magnitude(), 1.0f));
PX_ASSERT((SZ-SZcheck).magnitude()<1e-5*PxMax(SZcheck.magnitude(), 1.0f));
#endif
return result;
}
}
#endif
#endif //DY_ARTICULATION_REFERENCE_H

View File

@ -0,0 +1,317 @@
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2021 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#include "foundation/PxPreprocessor.h"
#include "DySpatial.h"
#include "DyArticulation.h"
#include "DyArticulationScalar.h"
#include "DyArticulationFnsScalar.h"
#include "DyArticulationReference.h"
#include "DyArticulationFnsSimd.h"
namespace physx
{
namespace Dy
{
#if DY_ARTICULATION_DEBUG_VERIFY
namespace
{
Cm::SpatialVector SpV(Vec3V linear, Vec3V angular)
{
return Cm::SpatialVector((PxVec3 &)linear, (PxVec3&)angular);
}
}
#endif
//void PxcFsApplyImpulse(ArticulationV &articulationV,
// PxU32 linkID,
// Vec3V linear,
// Vec3V angular)
//{
//#if DY_ARTICULATION_DEBUG_VERIFY
// {
// Cm::SpatialVectorV imp(linear, angular);
// ArticulationRef::applyImpulse(matrix, reinterpret_cast<Cm::SpatialVector *>(getRefVelocity(matrix)), linkID, reinterpret_cast<Cm::SpatialVector&>(imp));
// }
//#endif
// Articulation& articulation = static_cast<>(articulationV
// FsData& matrix = *articulation.getFsDataPtr();
// Vec3V linZ = V3Neg(linear);
// Vec3V angZ = V3Neg(angular);
//
// const FsRow *rows = getFsRows(matrix);
// const FsJointVectors* jointVectors = getJointVectors(matrix);
//
//#if DY_ARTICULATION_DEBUG_VERIFY
// const FsRowAux *aux = getAux(matrix);
//#endif
// Vec3V *deferredSZ = getDeferredSZ(matrix);
//
// for(PxU32 i = linkID; i!=0; i = matrix.parent[i])
// {
// const FsRow &row = rows[i];
// const FsJointVectors& jv = jointVectors[i];
//
//#if DY_ARTICULATION_DEBUG_VERIFY
// PxVec3 SZcheck;
// Cm::SpatialVector Zcheck = ArticulationRef::propagateImpulse(row, jv, SZcheck, SpV(linZ, angZ), aux[i]);
//#endif
//
// Vec3V SZ = V3Add(angZ, V3Cross(linZ, jv.jointOffset));
// Vec3V lrLinear = V3Sub(linZ, V3ScaleAdd(row.DSI[0].linear, V3GetX(SZ),
// V3ScaleAdd(row.DSI[1].linear, V3GetY(SZ),
// V3Scale(row.DSI[2].linear, V3GetZ(SZ)))));
//
// Vec3V lrAngular = V3Sub(angZ, V3ScaleAdd(row.DSI[0].angular, V3GetX(SZ),
// V3ScaleAdd(row.DSI[1].angular, V3GetY(SZ),
// V3Scale(row.DSI[2].angular, V3GetZ(SZ)))));
//
// linZ = lrLinear;
// angZ = V3Add(lrAngular, V3Cross(jv.parentOffset, lrLinear));
// deferredSZ[i] = V3Add(deferredSZ[i], SZ);
//
// PX_ASSERT(Ps::aos::isFiniteVec3V(linZ));
// PX_ASSERT(Ps::aos::isFiniteVec3V(angZ));
//
//#if DY_ARTICULATION_DEBUG_VERIFY
// Cm::SpatialVector Z = SpV(linZ,angZ);
// PX_ASSERT((Z - Zcheck).magnitude()<1e-4*PxMax(Zcheck.magnitude(), 1.0f));
// PX_ASSERT(((PxVec3&)SZ-SZcheck).magnitude()<1e-4*PxMax(SZcheck.magnitude(), 1.0f));
//#endif
// }
//
// matrix.deferredZ.linear = V3Add(matrix.deferredZ.linear, linZ);
// matrix.deferredZ.angular = V3Add(matrix.deferredZ.angular, angZ);
//
// matrix.dirty |= rows[linkID].pathToRoot;
//}
//Cm::SpatialVectorV PxcFsGetVelocity(Articulation &articulation,
// PxU32 linkID)
//{
// FsData& matrix = *articulation.getFsDataPtr();
// const FsRow *rows = getFsRows(matrix);
// const FsJointVectors* jointVectors = getJointVectors(matrix);
//
//#if DY_ARTICULATION_DEBUG_VERIFY
// const FsRowAux *aux = getAux(matrix);
//#endif
// Cm::SpatialVectorV* PX_RESTRICT V = getVelocity(matrix);
//
// // find the dirty node on the path (including the root) with the lowest index
// ArticulationBitField toUpdate = rows[linkID].pathToRoot & matrix.dirty;
//
//
// if(toUpdate)
// {
// // store the dV elements densely and use an array map to decode - hopefully cache friendlier
// PxU32 indexToStackLoc[DY_ARTICULATION_MAX_SIZE], count = 0;
// Cm::SpatialVectorV dVStack[DY_ARTICULATION_MAX_SIZE];
//
// ArticulationBitField ignoreNodes = (toUpdate & (0-toUpdate))-1;
// ArticulationBitField path = rows[linkID].pathToRoot & ~ignoreNodes, p = path;
// ArticulationBitField newDirty = 0;
//
// Vec3V ldV = V3Zero(), adV = V3Zero();
// Cm::SpatialVectorV* PX_RESTRICT defV = getDeferredVel(matrix);
// Vec3V* PX_RESTRICT SZ = getDeferredSZ(matrix);
//
// if(p & 1)
// {
// const FsInertia &m = getRootInverseInertia(matrix);
// Vec3V lZ = V3Neg(matrix.deferredZ.linear);
// Vec3V aZ = V3Neg(matrix.deferredZ.angular);
//
// ldV = V3Add(M33MulV3(m.ll,lZ), M33MulV3(m.la,aZ));
// adV = V3Add(M33TrnspsMulV3(m.la,lZ), M33MulV3(m.aa,aZ));
//
// V[0].linear = V3Add(V[0].linear, ldV);
// V[0].angular = V3Add(V[0].angular, adV);
//
// matrix.deferredZ.linear = V3Zero();
// matrix.deferredZ.angular = V3Zero();
//
// indexToStackLoc[0] = count;
// Cm::SpatialVectorV &e = dVStack[count++];
//
// e.linear = ldV;
// e.angular = adV;
//
// newDirty = rows[0].children;
// p--;
// }
//
//
// while(p) // using "for(;p;p &= (p-1))" here generates LHSs from the ArticulationLowestSetBit
// {
// PxU32 i = ArticulationLowestSetBit(p);
// const FsJointVectors& jv = jointVectors[i];
//
// p &= (p-1);
//
// const FsRow* PX_RESTRICT row = rows + i;
//
// ldV = V3Add(ldV, defV[i].linear);
// adV = V3Add(adV, defV[i].angular);
//
//#if DY_ARTICULATION_DEBUG_VERIFY
// Cm::SpatialVector dVcheck = ArticulationRef::propagateVelocity(*row, jv, (PxVec3&)SZ[i], SpV(ldV,adV), aux[i]);
//#endif
//
// Vec3V DSZ = M33MulV3(row->D, SZ[i]);
//
// Vec3V lW = V3Add(ldV, V3Cross(adV,jv.parentOffset));
// Vec3V aW = adV;
//
// const Cm::SpatialVectorV*PX_RESTRICT DSI = row->DSI;
// Vec3V lN = V3Merge(V3Dot(DSI[0].linear, lW), V3Dot(DSI[1].linear, lW), V3Dot(DSI[2].linear, lW));
// Vec3V aN = V3Merge(V3Dot(DSI[0].angular, aW), V3Dot(DSI[1].angular, aW), V3Dot(DSI[2].angular, aW));
//
// Vec3V n = V3Add(V3Add(lN, aN), DSZ);
//
// ldV = V3Sub(lW, V3Cross(jv.jointOffset,n));
// adV = V3Sub(aW, n);
//
//#if DY_ARTICULATION_DEBUG_VERIFY
// Cm::SpatialVector dV = SpV(ldV,adV);
// PX_ASSERT((dV-dVcheck).magnitude()<1e-4*PxMax(dVcheck.magnitude(), 1.0f));
//#endif
//
// V[i].linear = V3Add(V[i].linear, ldV);
// V[i].angular = V3Add(V[i].angular, adV);
//
// defV[i].linear = V3Zero();
// defV[i].angular = V3Zero();
// SZ[i] = V3Zero();
//
// indexToStackLoc[i] = count;
// Cm::SpatialVectorV &e = dVStack[count++];
// newDirty |= rows[i].children;
//
// e.linear = ldV;
// e.angular = adV;
// }
//
// for(ArticulationBitField defer = newDirty&~path; defer; defer &= (defer-1))
// {
// PxU32 i = ArticulationLowestSetBit(defer);
// PxU32 parent = indexToStackLoc[matrix.parent[i]];
//
// defV[i].linear = V3Add(defV[i].linear, dVStack[parent].linear);
// defV[i].angular = V3Add(defV[i].angular, dVStack[parent].angular);
// }
//
// matrix.dirty = (matrix.dirty | newDirty)&~path;
// }
//#if DY_ARTICULATION_DEBUG_VERIFY
// Cm::SpatialVector v = reinterpret_cast<Cm::SpatialVector&>(V[linkID]);
// Cm::SpatialVector rv = reinterpret_cast<Cm::SpatialVector&>(getRefVelocity(matrix)[linkID]);
// PX_ASSERT((v-rv).magnitude()<1e-4f * PxMax(rv.magnitude(),1.0f));
//#endif
//
// return V[linkID];
//}
Cm::SpatialVectorV PxcFsGetMotionVector(ArticulationV& articulation,
PxU32 linkID)
{
/*const Cm::SpatialVectorV* PX_RESTRICT V = articulation.getMotionVector();
return V[linkID];*/
return articulation.getLinkMotionVector(linkID);
}
PX_FORCE_INLINE Cm::SpatialVectorV propagateVelocitySIMD(const FsRow& row,
const FsJointVectors& jv,
const Vec3V& SZ,
const Cm::SpatialVectorV& v,
const FsRowAux& aux)
{
PX_UNUSED(aux);
typedef ArticulationFnsSimd<ArticulationFnsSimdBase> Fns;
Cm::SpatialVectorV w(V3Add(v.linear, V3Cross(v.angular, jv.parentOffset)), v.angular);
Vec3V DSZ = M33MulV3(row.D, SZ);
Vec3V n = V3Add(Fns::axisDot(row.DSI, w), DSZ);
Cm::SpatialVectorV result = w - Cm::SpatialVectorV(V3Cross(jv.jointOffset,n), n);
#if DY_ARTICULATION_DEBUG_VERIFY
Cm::SpatialVector check = ArticulationRef::propagateVelocity(row, jv, reinterpret_cast<const PxVec3&>(SZ), reinterpret_cast<const Cm::SpatialVector&>(v), aux);
PX_ASSERT((reinterpret_cast<const Cm::SpatialVector&>(result)-check).magnitude()<1e-4*PxMax(check.magnitude(), 1.0f));
#endif
return result;
}
void PxcFsFlushVelocity(FsData& matrix)
{
typedef ArticulationFnsSimd<ArticulationFnsSimdBase> Fns;
const FsRow* PX_RESTRICT rows = getFsRows(matrix);
const FsRowAux* PX_RESTRICT aux = getAux(matrix);
const FsJointVectors*PX_RESTRICT jointVectors = getJointVectors(matrix);
Cm::SpatialVectorV V0 = Fns::multiply(getRootInverseInertia(matrix), -matrix.deferredZ);
matrix.deferredZ = Cm::SpatialVectorV(PxZero);
getVelocity(matrix)[0] += V0;
for(ArticulationBitField defer = rows[0].children; defer; defer &= (defer-1))
getDeferredVel(matrix)[ArticulationLowestSetBit(defer)] += V0;
for(PxU32 i = 1; i<matrix.linkCount; i++)
{
Cm::SpatialVectorV V = propagateVelocitySIMD(rows[i], jointVectors[i], getDeferredSZ(matrix)[i], getDeferredVel(matrix)[i], aux[i]);
getDeferredVel(matrix)[i] = Cm::SpatialVectorV(PxZero);
getDeferredSZ(matrix)[i] = V3Zero();
getVelocity(matrix)[i] += V;
for(ArticulationBitField defer = rows[i].children; defer; defer &= (defer-1))
getDeferredVel(matrix)[ArticulationLowestSetBit(defer)] += V;
}
#if DY_ARTICULATION_DEBUG_VERIFY
for(PxU32 i=0;i<matrix.linkCount;i++)
{
Cm::SpatialVector v = velocityRef(matrix,i), rv = reinterpret_cast<Cm::SpatialVector&>(getRefVelocity(matrix)[i]);
Cm::SpatialVector diff = v-rv;
PxReal m = rv.magnitude();
PX_UNUSED(m);
PX_ASSERT(diff.magnitude()<1e-4*PxMax(1.0f,m));
}
#endif
matrix.dirty = 0;
}
}
}

View File

@ -0,0 +1,575 @@
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2021 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#include "DyArticulationUtils.h"
#include "DyArticulationScalar.h"
#include "DyArticulationReference.h"
#include "DyArticulationFnsDebug.h"
namespace physx
{
namespace Dy
{
namespace ArticulationRef
{
Cm::SpatialVector propagateImpulse(const FsRow& row,
const FsJointVectors& j,
PxVec3& SZ,
const Cm::SpatialVector& Z,
const FsRowAux& aux)
{
typedef ArticulationFnsScalar Fns;
SZ = Fns::axisDot(reinterpret_cast<const Cm::SpatialVector*>(aux.S), Z);
return Fns::translateForce(getParentOffset(j), Z - Fns::axisMultiply(getDSI(row), SZ));
}
Cm::SpatialVector propagateVelocity(const FsRow& row,
const FsJointVectors& j,
const PxVec3& SZ,
const Cm::SpatialVector& v,
const FsRowAux& aux)
{
typedef ArticulationFnsScalar Fns;
Cm::SpatialVector w = Fns::translateMotion(-getParentOffset(j), v);
PxVec3 DSZ = Fns::multiply(row.D, SZ);
return w - Fns::axisMultiply(reinterpret_cast<const Cm::SpatialVector*>(aux.S), DSZ + Fns::axisDot(getDSI(row), w));
}
void applyImpulse(const FsData& matrix,
Cm::SpatialVector* velocity,
PxU32 linkID,
const Cm::SpatialVector& impulse)
{
typedef ArticulationFnsScalar Fns;
PX_ASSERT(matrix.linkCount<=DY_ARTICULATION_MAX_SIZE);
const FsRow* rows = getFsRows(matrix);
const FsRowAux* aux = getAux(matrix);
const FsJointVectors* jointVectors = getJointVectors(matrix);
Cm::SpatialVector dV[DY_ARTICULATION_MAX_SIZE];
PxVec3 SZ[DY_ARTICULATION_MAX_SIZE];
for(PxU32 i=0;i<matrix.linkCount;i++)
SZ[i] = PxVec3(0);
Cm::SpatialVector Z = -impulse;
for(;linkID!=0; linkID = matrix.parent[linkID])
Z = ArticulationRef::propagateImpulse(rows[linkID], jointVectors[linkID], SZ[linkID], Z, aux[linkID]);
dV[0] = Fns::getRootDeltaV(matrix,-Z);
for(PxU32 i=1;i<matrix.linkCount; i++)
dV[i] = ArticulationRef::propagateVelocity(rows[i], jointVectors[i], SZ[i], dV[matrix.parent[i]], aux[i]);
for(PxU32 i=0;i<matrix.linkCount;i++)
velocity[i] += dV[i];
}
void ltbFactor(FsData& m)
{
typedef ArticulationFnsScalar Fns;
LtbRow* rows = getLtbRows(m);
SpInertia inertia[DY_ARTICULATION_MAX_SIZE];
for(PxU32 i=0;i<m.linkCount;i++)
inertia[i] = ArticulationFnsDebug::unsimdify(rows[i].inertia);
Cm::SpatialVector j[3];
for(PxU32 i=m.linkCount; --i>0;)
{
LtbRow& b = rows[i];
inertia[i] = Fns::invertInertia(inertia[i]);
PxU32 p = m.parent[i];
Cm::SpatialVector* j0 = &reinterpret_cast<Cm::SpatialVector&>(*b.j0),
* j1 = &reinterpret_cast<Cm::SpatialVector&>(*b.j1);
Fns::multiply(j, inertia[i], j1);
PxMat33 jResponse = Fns::invertSym33(-Fns::multiplySym(j, j1));
j1[0] = j[0]; j1[1] = j[1]; j1[2] = j[2];
b.jResponse = Mat33V_From_PxMat33(jResponse);
Fns::multiply(j, j0, jResponse);
inertia[p] = Fns::multiplySubtract(inertia[p], j, j0);
j0[0] = j[0]; j0[1] = j[1]; j0[2] = j[2];
}
rows[0].inertia = Fns::invertInertia(inertia[0]);
for(PxU32 i=1;i<m.linkCount;i++)
rows[i].inertia = inertia[i];
}
}
#if 0
void ltbSolve(const FsData& m,
Vec3V* c, // rhs error to solve for
Cm::SpatialVector* y) // velocity delta output
{
typedef ArticulationFnsScalar Fns;
PxVec4* b = reinterpret_cast<PxVec4*>(c);
const LtbRow* rows = getLtbRows(m);
PxMemZero(y, m.linkCount*sizeof(Cm::SpatialVector));
for(PxU32 i=m.linkCount;i-->1;)
{
PxU32 p = m.parent[i];
const LtbRow& r = rows[i];
b[i] -= PxVec4(Fns::axisDot(&static_cast<const Cm::SpatialVector&>(*r.j1), y[i]),0);
y[p] -= Fns::axisMultiply(&static_cast<const Cm::SpatialVector&>(*r.j0), b[i].getXYZ());
}
y[0] = Fns::multiply(rows[0].inertia,y[0]);
for(PxU32 i=1; i<m.linkCount; i++)
{
PxU32 p = m.parent[i];
const LtbRow& r = rows[i];
PxVec3 t = Fns::multiply(r.jResponse, b[i].getXYZ()) - Fns::axisDot(&static_cast<const Cm::SpatialVector&>(*r.j0), y[p]);
y[i] = Fns::multiply(r.inertia, y[i]) - Fns::axisMultiply(&static_cast<const Cm::SpatialVector&>(*r.j1), t);
}
}
void PxcFsPropagateDrivenInertiaScalar(FsData& matrix,
const FsInertia* baseInertia,
const PxReal* isf,
const Mat33V* load)
{
typedef ArticulationFnsScalar Fns;
Cm::SpatialVector IS[3], DSI[3];
PxMat33 D;
FsRow* rows = getFsRows(matrix);
const FsRowAux* aux = getAux(matrix);
const FsJointVectors* jointVectors = getJointVectors(matrix);
SpInertia inertia[DY_ARTICULATION_MAX_SIZE];
for(PxU32 i=0;i<matrix.linkCount;i++)
inertia[i] = ArticulationFnsDebug::unsimdify(baseInertia[i]);
for(PxU32 i=matrix.linkCount; --i>0;)
{
FsRow& r = rows[i];
const FsRowAux& a = aux[i];
const FsJointVectors& jv = jointVectors[i];
Fns::multiply(IS, inertia[i], &static_cast<const Cm::SpatialVector&>(*a.S));
PX_ALIGN(16, PxMat33) L;
PxMat33_From_Mat33V(load[i], L);
D = Fns::invertSym33(Fns::multiplySym(&static_cast<const Cm::SpatialVector&>(*a.S), IS) + L*isf[i]);
Fns::multiply(DSI, IS, D);
r.D = Mat33V_From_PxMat33(D);
static_cast<Cm::SpatialVector&>(r.DSI[0]) = DSI[0];
static_cast<Cm::SpatialVector&>(r.DSI[1]) = DSI[1];
static_cast<Cm::SpatialVector&>(r.DSI[2]) = DSI[2];
inertia[matrix.parent[i]] += Fns::translate(getParentOffset(jv), Fns::multiplySubtract(inertia[i], DSI, IS));
}
FsInertia& m = getRootInverseInertia(matrix);
m = FsInertia(Fns::invertInertia(inertia[0]));
}
// no need to compile this ecxcept for verification, and it consumes huge amounts of stack space
void PxcFsComputeJointLoadsScalar(const FsData& matrix,
const FsInertia*PX_RESTRICT baseInertia,
Mat33V*PX_RESTRICT load,
const PxReal*PX_RESTRICT isf,
PxU32 linkCount,
PxU32 maxIterations)
{
typedef ArticulationFnsScalar Fns;
// the childward S
SpInertia leafwardInertia[DY_ARTICULATION_MAX_SIZE];
SpInertia rootwardInertia[DY_ARTICULATION_MAX_SIZE];
SpInertia inertia[DY_ARTICULATION_MAX_SIZE];
SpInertia contribToParent[DY_ARTICULATION_MAX_SIZE];
// total articulated inertia assuming the articulation is rooted here
const FsRow* row = getFsRows(matrix);
const FsRowAux* aux = getAux(matrix);
const FsJointVectors* jointVectors = getJointVectors(matrix);
PX_UNUSED(row);
PxMat33 load_[DY_ARTICULATION_MAX_SIZE];
for(PxU32 iter=0;iter<maxIterations;iter++)
{
for(PxU32 i=0;i<linkCount;i++)
inertia[i] = ArticulationFnsDebug::unsimdify(baseInertia[i]);
for(PxU32 i=linkCount;i-->1;)
{
const FsJointVectors& j = jointVectors[i];
leafwardInertia[i] = inertia[i];
contribToParent[i] = Fns::propagate(inertia[i], &static_cast<const Cm::SpatialVector&>(*aux[i].S), load_[i], isf[i]);
inertia[matrix.parent[i]] += Fns::translate((PxVec3&)j.parentOffset, contribToParent[i]);
}
for(PxU32 i=1;i<linkCount;i++)
{
rootwardInertia[i] = Fns::translate(-(PxVec3&)jointVectors[i].parentOffset, inertia[matrix.parent[i]]) - contribToParent[i];
inertia[i] += Fns::propagate(rootwardInertia[i], &static_cast<const Cm::SpatialVector&>(*aux[i].S), load_[i], isf[i]);
}
for(PxU32 i=1;i<linkCount;i++)
{
load_[i] = Fns::computeDriveInertia(leafwardInertia[i], rootwardInertia[i], &static_cast<const Cm::SpatialVector&>(*aux[i].S));
PX_ASSERT(load_[i][0].isFinite() && load_[i][1].isFinite() && load_[2][i].isFinite());
}
}
for(PxU32 i=1;i<linkCount;i++)
load[i] = Mat33V_From_PxMat33(load_[i]);
}
void PxcFsApplyImpulse(const FsData& matrix,
PxU32 linkID,
const Cm::SpatialVector& impulse)
{
#if DY_ARTICULATION_DEBUG_VERIFY
PxcFsRefApplyImpulse(matrix, state.refVelocity, linkID, impulse);
#endif
Cm::SpatialVector Z = -impulse;
for(PxU32 i = linkID; i!=0; i = matrix.row[i].parent)
{
PxVec3 SZ;
Z = propagateImpulse(matrix.row[i], SZ, Z, matrix.aux[i]);
deferredSZRef(state,i) += SZ;
}
static_cast<Cm::SpatialVector &>(state.deferredZ) += Z;
state.dirty |= matrix.row[linkID].pathToRoot;
}
Cm::SpatialVector PxcFsGetVelocity(const FsData& matrix,
PxU32 linkID)
{
// find the dirty node on the path (including the root) with the lowest index
ArticulationBitField toUpdate = matrix.row[linkID].pathToRoot & state.dirty;
if(toUpdate)
{
ArticulationBitField ignoreNodes = (toUpdate & (0-toUpdate))-1;
ArticulationBitField path = matrix.row[linkID].pathToRoot & ~ignoreNodes, p = path;
ArticulationBitField newDirty = 0;
Cm::SpatialVector dV = Cm::SpatialVector::zero();
if(p & 1)
{
dV = getRootDeltaV(matrix, -deferredZ(state));
velocityRef(state, 0) += dV;
for(ArticulationBitField defer = matrix.row[0].children & ~path; defer; defer &= (defer-1))
deferredVelRef(state, ArticulationLowestSetBit(defer)) += dV;
deferredZRef(state) = Cm::SpatialVector::zero();
newDirty = matrix.row[0].children;
p--;
}
for(; p; p &= (p-1))
{
PxU32 i = ArticulationLowestSetBit(p);
dV = propagateVelocity(matrix.row[i], deferredSZ(state,i), dV + state.deferredVel[i], matrix.aux[i]);
velocityRef(state,i) += dV;
for(ArticulationBitField defer = matrix.row[i].children & ~path; defer; defer &= (defer-1))
deferredVelRef(state,ArticulationLowestSetBit(defer)) += dV;
newDirty |= matrix.row[i].children;
deferredVelRef(state,i) = Cm::SpatialVector::zero();
deferredSZRef(state,i) = PxVec3(0);
}
state.dirty = (state.dirty | newDirty)&~path;
}
#if DY_ARTICULATION_DEBUG_VERIFY
Cm::SpatialVector v = state.velocity[linkID];
Cm::SpatialVector rv = state.refVelocity[linkID];
PX_ASSERT((v-rv).magnitude()<1e-4f * rv.magnitude());
#endif
return state.velocity[linkID];
}
void PxcFsFlushVelocity(const FsData& matrix)
{
Cm::SpatialVector V = getRootDeltaV(matrix, -deferredZ(state));
deferredZRef(state) = Cm::SpatialVector::zero();
velocityRef(state,0) += V;
for(ArticulationBitField defer = matrix.row[0].children; defer; defer &= (defer-1))
deferredVelRef(state,ArticulationLowestSetBit(defer)) += V;
for(PxU32 i = 1; i<matrix.linkCount; i++)
{
Cm::SpatialVector V = propagateVelocity(matrix.row[i], deferredSZ(state,i), state.deferredVel[i], matrix.aux[i]);
deferredVelRef(state,i) = Cm::SpatialVector::zero();
deferredSZRef(state,i) = PxVec3(0);
velocityRef(state,i) += V;
for(ArticulationBitField defer = matrix.row[i].children; defer; defer &= (defer-1))
deferredVelRef(state,ArticulationLowestSetBit(defer)) += V;
}
state.dirty = 0;
}
void PxcFsPropagateDrivenInertiaScalar(FsData& matrix,
const FsInertia* baseInertia,
const PxReal* isf,
const Mat33V* load,
PxcFsScratchAllocator allocator)
{
typedef ArticulationFnsSimd<ArticulationFnsSimdBase> Fns;
Cm::SpatialVectorV IS[3];
PxMat33 D;
FsRow* rows = getFsRows(matrix);
const FsRowAux* aux = getAux(matrix);
const FsJointVectors* jointVectors = getJointVectors(matrix);
FsInertia *inertia = allocator.alloc<FsInertia>(matrix.linkCount);
PxMemCopy(inertia, baseInertia, matrix.linkCount*sizeof(FsInertia));
for(PxU32 i=matrix.linkCount; --i>0;)
{
FsRow& r = rows[i];
const FsRowAux& a = aux[i];
const FsJointVectors& jv = jointVectors[i];
Mat33V m = Fns::computeSIS(inertia[i], a.S, IS);
FloatV f = FLoad(isf[i]);
Mat33V D = Fns::invertSym33(Mat33V(V3ScaleAdd(load[i].col0, f, m.col0),
V3ScaleAdd(load[i].col1, f, m.col1),
V3ScaleAdd(load[i].col2, f, m.col2)));
r.D = D;
inertia[matrix.parent[i]] = Fns::addInertia(inertia[matrix.parent[i]],
Fns::translateInertia(jv.parentOffset, Fns::multiplySubtract(inertia[i], D, IS, r.DSI)));
}
getRootInverseInertia(matrix) = Fns::invertInertia(inertia[0]);
}
void PxcLtbSolve(const FsData& m,
Vec3V* c, // rhs error to solve for
Cm::SpatialVector* y) // velocity delta output
{
typedef ArticulationFnsScalar Fns;
PxVec4* b = reinterpret_cast<PxVec4*>(c);
const LtbRow* rows = getLtbRows(m);
PxMemZero(y, m.linkCount*sizeof(Cm::SpatialVector));
for(PxU32 i=m.linkCount;i-->1;)
{
PxU32 p = m.parent[i];
const LtbRow& r = rows[i];
b[i] -= PxVec4(Fns::axisDot(&static_cast<const Cm::SpatialVector&>(*r.j1), y[i]),0);
y[p] -= Fns::axisMultiply(&static_cast<const Cm::SpatialVector&>(*r.j0), b[i].getXYZ());
}
y[0] = Fns::multiply(rows[0].inertia,y[0]);
for(PxU32 i=1; i<m.linkCount; i++)
{
PxU32 p = m.parent[i];
const LtbRow& r = rows[i];
PxVec3 t = Fns::multiply(r.jResponse, b[i].getXYZ()) - Fns::axisDot(&static_cast<const Cm::SpatialVector&>(*r.j0), y[p]);
y[i] = Fns::multiply(r.inertia, y[i]) - Fns::axisMultiply(&static_cast<const Cm::SpatialVector&>(*r.j1), t);
}
}
#endif
#if DY_ARTICULATION_DEBUG_VERIFY
void PxcLtbFactorScalar(FsData& m)
{
typedef ArticulationFnsScalar Fns;
LtbRow* rows = getLtbRows(m);
SpInertia inertia[DY_ARTICULATION_MAX_SIZE];
for(PxU32 i=0;i<m.linkCount;i++)
inertia[i] = ArticulationFnsDebug::unsimdify(rows[i].inertia);
Cm::SpatialVector j[3];
for(PxU32 i=m.linkCount; --i>0;)
{
LtbRow& b = rows[i];
inertia[i] = Fns::invertInertia(inertia[i]);
PxU32 p = m.parent[i];
Cm::SpatialVector* j0 = &reinterpret_cast<Cm::SpatialVector&>(*b.j0),
* j1 = &reinterpret_cast<Cm::SpatialVector&>(*b.j1);
Fns::multiply(j, inertia[i], j1);
PxMat33 jResponse = Fns::invertSym33(-Fns::multiplySym(j, j1));
j1[0] = j[0]; j1[1] = j[1]; j1[2] = j[2];
b.jResponse = Mat33V_From_PxMat33(jResponse);
Fns::multiply(j, j0, jResponse);
inertia[p] = Fns::multiplySubtract(inertia[p], j, j0);
j0[0] = j[0]; j0[1] = j[1]; j0[2] = j[2];
}
rows[0].inertia = Fns::invertInertia(inertia[0]);
for(PxU32 i=1;i<m.linkCount;i++)
rows[i].inertia = inertia[i];
}
void PxcFsPropagateDrivenInertiaScalar(FsData& matrix,
const FsInertia* baseInertia,
const PxReal* isf,
const Mat33V* load)
{
typedef ArticulationFnsScalar Fns;
Cm::SpatialVector IS[3], DSI[3];
PxMat33 D;
FsRow* rows = getFsRows(matrix);
const FsRowAux* aux = getAux(matrix);
const FsJointVectors* jointVectors = getJointVectors(matrix);
SpInertia inertia[DY_ARTICULATION_MAX_SIZE];
for(PxU32 i=0;i<matrix.linkCount;i++)
inertia[i] = ArticulationFnsDebug::unsimdify(baseInertia[i]);
for(PxU32 i=matrix.linkCount; --i>0;)
{
FsRow& r = rows[i];
const FsRowAux& a = aux[i];
const FsJointVectors& jv = jointVectors[i];
Fns::multiply(IS, inertia[i], &reinterpret_cast<const Cm::SpatialVector&>(*a.S));
PX_ALIGN(16, PxMat33) L;
PxMat33_From_Mat33V(load[i], L);
D = Fns::invertSym33(Fns::multiplySym(&reinterpret_cast<const Cm::SpatialVector&>(*a.S), IS) + L*isf[i]);
Fns::multiply(DSI, IS, D);
r.D = Mat33V_From_PxMat33(D);
reinterpret_cast<Cm::SpatialVector&>(r.DSI[0]) = DSI[0];
reinterpret_cast<Cm::SpatialVector&>(r.DSI[1]) = DSI[1];
reinterpret_cast<Cm::SpatialVector&>(r.DSI[2]) = DSI[2];
inertia[matrix.parent[i]] += Fns::translate(getParentOffset(jv), Fns::multiplySubtract(inertia[i], DSI, IS));
}
FsInertia& m = getRootInverseInertia(matrix);
m = FsInertia(Fns::invertInertia(inertia[0]));
}
// no need to compile this ecxcept for verification, and it consumes huge amounts of stack space
void PxcFsComputeJointLoadsScalar(const FsData& matrix,
const FsInertia*PX_RESTRICT baseInertia,
Mat33V*PX_RESTRICT load,
const PxReal*PX_RESTRICT isf,
PxU32 linkCount,
PxU32 maxIterations)
{
typedef ArticulationFnsScalar Fns;
// the childward S
SpInertia leafwardInertia[DY_ARTICULATION_MAX_SIZE];
SpInertia rootwardInertia[DY_ARTICULATION_MAX_SIZE];
SpInertia inertia[DY_ARTICULATION_MAX_SIZE];
SpInertia contribToParent[DY_ARTICULATION_MAX_SIZE];
// total articulated inertia assuming the articulation is rooted here
const FsRow* row = getFsRows(matrix);
const FsRowAux* aux = getAux(matrix);
const FsJointVectors* jointVectors = getJointVectors(matrix);
PX_UNUSED(row);
PxMat33 load_[DY_ARTICULATION_MAX_SIZE];
for(PxU32 iter=0;iter<maxIterations;iter++)
{
for(PxU32 i=0;i<linkCount;i++)
inertia[i] = ArticulationFnsDebug::unsimdify(baseInertia[i]);
for(PxU32 i=linkCount;i-->1;)
{
const FsJointVectors& j = jointVectors[i];
leafwardInertia[i] = inertia[i];
contribToParent[i] = Fns::propagate(inertia[i], &reinterpret_cast<const Cm::SpatialVector&>(*aux[i].S), load_[i], isf[i]);
inertia[matrix.parent[i]] += Fns::translate((PxVec3&)j.parentOffset, contribToParent[i]);
}
for(PxU32 i=1;i<linkCount;i++)
{
rootwardInertia[i] = Fns::translate(-(PxVec3&)jointVectors[i].parentOffset, inertia[matrix.parent[i]]) - contribToParent[i];
inertia[i] += Fns::propagate(rootwardInertia[i], &reinterpret_cast<const Cm::SpatialVector&>(*aux[i].S), load_[i], isf[i]);
}
for(PxU32 i=1;i<linkCount;i++)
{
load_[i] = Fns::computeDriveInertia(leafwardInertia[i], rootwardInertia[i], &reinterpret_cast<const Cm::SpatialVector&>(*aux[i].S));
PX_ASSERT(load_[i][0].isFinite() && load_[i][1].isFinite() && load_[2][i].isFinite());
}
}
for(PxU32 i=1;i<linkCount;i++)
load[i] = Mat33V_From_PxMat33(load_[i]);
}
#endif
}
}

View File

@ -0,0 +1,101 @@
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2021 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#ifndef DY_ARTICULATION_SCALAR_H
#define DY_ARTICULATION_SCALAR_H
// Scalar helpers for articulations
#include "foundation/PxUnionCast.h"
#include "DyArticulationUtils.h"
#include "DySpatial.h"
#include "PsFPU.h"
namespace physx
{
namespace Dy
{
PX_FORCE_INLINE Cm::SpatialVector& velocityRef(FsData &m, PxU32 i)
{
return reinterpret_cast<Cm::SpatialVector&>(getVelocity(m)[i]);
}
PX_FORCE_INLINE Cm::SpatialVector& deferredVelRef(FsData &m, PxU32 i)
{
return reinterpret_cast<Cm::SpatialVector&>(getDeferredVel(m)[i]);
}
PX_FORCE_INLINE PxVec3& deferredSZRef(FsData &m, PxU32 i)
{
return reinterpret_cast<PxVec3 &>(getDeferredSZ(m)[i]);
}
PX_FORCE_INLINE const PxVec3& deferredSZ(const FsData &s, PxU32 i)
{
return reinterpret_cast<const PxVec3 &>(getDeferredSZ(s)[i]);
}
PX_FORCE_INLINE Cm::SpatialVector& deferredZRef(FsData &s)
{
return unsimdRef(s.deferredZ);
}
PX_FORCE_INLINE const Cm::SpatialVector& deferredZ(const FsData &s)
{
return unsimdRef(s.deferredZ);
}
PX_FORCE_INLINE const PxVec3& getJointOffset(const FsJointVectors& j)
{
return reinterpret_cast<const PxVec3& >(j.jointOffset);
}
PX_FORCE_INLINE const PxVec3& getParentOffset(const FsJointVectors& j)
{
return reinterpret_cast<const PxVec3&>(j.parentOffset);
}
PX_FORCE_INLINE const Cm::SpatialVector* getDSI(const FsRow& row)
{
return PxUnionCast<const Cm::SpatialVector*,const Cm::SpatialVectorV*>(row.DSI); //reinterpret_cast<const Cm::SpatialVector*>(row.DSI);
}
}
}
#endif //DY_ARTICULATION_SCALAR_H

View File

@ -0,0 +1,336 @@
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2021 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#ifndef DY_ARTICULATION_H
#define DY_ARTICULATION_H
#include "PsVecMath.h"
#include "CmSpatialVector.h"
#include "DySpatial.h"
#include "PsBitUtils.h"
#include "DyArticulation.h"
#include "DyArticulationHelper.h"
namespace physx
{
namespace Dy
{
struct ArticulationCore;
struct ArticulationLink;
typedef size_t ArticulationLinkHandle;
class Articulation;
#define DY_ARTICULATION_DEBUG_VERIFY 0
PX_FORCE_INLINE PxU32 ArticulationLowestSetBit(ArticulationBitField val)
{
PxU32 low = PxU32(val&0xffffffff), high = PxU32(val>>32);
PxU32 mask = PxU32((!low)-1);
PxU32 result = (mask&Ps::lowestSetBitUnsafe(low)) | ((~mask)&(Ps::lowestSetBitUnsafe(high)+32));
PX_ASSERT(val & (PxU64(1)<<result));
PX_ASSERT(!(val & ((PxU64(1)<<result)-1)));
return result;
}
PX_FORCE_INLINE PxU32 ArticulationHighestSetBit(ArticulationBitField val)
{
PxU32 low = PxU32(val & 0xffffffff), high = PxU32(val >> 32);
PxU32 mask = PxU32((!high) - 1);
PxU32 result = ((~mask)&Ps::highestSetBitUnsafe(low)) | ((mask)&(Ps::highestSetBitUnsafe(high) + 32));
PX_ASSERT(val & (PxU64(1) << result));
return result;
}
using namespace Ps::aos;
PX_FORCE_INLINE Cm::SpatialVector& unsimdRef(Cm::SpatialVectorV& v) { return reinterpret_cast<Cm::SpatialVector&>(v); }
PX_FORCE_INLINE const Cm::SpatialVector& unsimdRef(const Cm::SpatialVectorV& v) { return reinterpret_cast<const Cm::SpatialVector&>(v); }
PX_ALIGN_PREFIX(16)
struct FsJointVectors
{
Vec3V parentOffset; // 16 bytes world-space offset from parent to child
Vec3V jointOffset; // 16 bytes world-space offset from child to joint
}
PX_ALIGN_SUFFIX(16);
PX_ALIGN_PREFIX(16)
struct FsRow
{
Cm::SpatialVectorV DSI[3]; // 96 bytes
Mat33V D; // 48 bytes
ArticulationBitField children; // 8 bytes bitmap of children
ArticulationBitField pathToRoot; // 8 bytes bitmap of nodes to root, including self and root
}
PX_ALIGN_SUFFIX(16);
PX_COMPILE_TIME_ASSERT(sizeof(FsRow)==160);
PX_ALIGN_PREFIX(16)
struct FsInertia
{
Mat33V ll, la, aa;
PX_FORCE_INLINE FsInertia(const Mat33V& _ll, const Mat33V& _la, const Mat33V& _aa): ll(_ll), la(_la), aa(_aa) {}
PX_FORCE_INLINE FsInertia(const SpInertia& I)
: ll(Mat33V_From_PxMat33(I.mLL)), la(Mat33V_From_PxMat33(I.mLA)), aa(Mat33V_From_PxMat33(I.mAA)) {}
PX_FORCE_INLINE FsInertia() {}
PX_FORCE_INLINE void operator=(const FsInertia& other)
{
ll.col0 = other.ll.col0; ll.col1 = other.ll.col1; ll.col2 = other.ll.col2;
la.col0 = other.la.col0; la.col1 = other.la.col1; la.col2 = other.la.col2;
aa.col0 = other.aa.col0; aa.col1 = other.aa.col1; aa.col2 = other.aa.col2;
}
PX_FORCE_INLINE FsInertia(const FsInertia& other)
{
ll.col0 = other.ll.col0; ll.col1 = other.ll.col1; ll.col2 = other.ll.col2;
la.col0 = other.la.col0; la.col1 = other.la.col1; la.col2 = other.la.col2;
aa.col0 = other.aa.col0; aa.col1 = other.aa.col1; aa.col2 = other.aa.col2;
}
}PX_ALIGN_SUFFIX(16);
PX_ALIGN_PREFIX(16)
struct LtbRow
{
FsInertia inertia; // body inertia in world space
Cm::SpatialVectorV j0[3], j1[3]; // jacobians
Mat33V jResponse; // inverse response matrix of joint
Vec3V jC;
} PX_ALIGN_SUFFIX(16);
PX_ALIGN_PREFIX(16)
struct FsRowAux
{
Cm::SpatialVectorV S[3]; // motion subspace
}PX_ALIGN_SUFFIX(16);
struct FsData
{
//Articulation* articulationX; //4
#if !PX_P64_FAMILY
PxU32 pad0; //8
#endif
PxU16 linkCount; // number of links //10
PxU16 jointVectorOffset; // offset of read-only data //12
// PxU16 maxSolverNormalProgress; //14
// PxU16 maxSolverFrictionProgress; //16
PxU64 dirty; //24
PxU16 ltbDataOffset; // offset of save-velocity data //26
PxU16 fsDataOffset; // offset of joint references //28
//PxU32 solverProgress; //32
Cm::SpatialVectorV deferredZ; //64
PxU8 parent[DY_ARTICULATION_MAX_SIZE]; //128
};
PX_COMPILE_TIME_ASSERT(0 == (sizeof(FsData) & 0x0f));
#define SOLVER_BODY_SOLVER_PROGRESS_OFFSET 28
#define SOLVER_BODY_MAX_SOLVER_PROGRESS_OFFSET 12
namespace
{
template<class T> PX_FORCE_INLINE T addAddr(void* addr, PxU32 increment)
{
return reinterpret_cast<T>(reinterpret_cast<char*>(addr)+increment);
}
template<class T> PX_FORCE_INLINE T addAddr(const void* addr, PxU32 increment)
{
return reinterpret_cast<T>(reinterpret_cast<const char*>(addr)+increment);
}
}
PX_FORCE_INLINE Cm::SpatialVectorV* getVelocity(FsData& matrix)
{
return addAddr<Cm::SpatialVectorV*>(&matrix, sizeof(FsData));
}
PX_FORCE_INLINE const Cm::SpatialVectorV* getVelocity(const FsData& matrix)
{
return addAddr<const Cm::SpatialVectorV*>(&matrix, sizeof(FsData));
}
PX_FORCE_INLINE Cm::SpatialVectorV* getDeferredVel(FsData& matrix)
{
return addAddr<Cm::SpatialVectorV*>(getVelocity(matrix), sizeof(Cm::SpatialVectorV) * matrix.linkCount);
}
PX_FORCE_INLINE const Cm::SpatialVectorV* getDeferredVel(const FsData& matrix)
{
return addAddr<const Cm::SpatialVectorV*>(getVelocity(matrix), sizeof(Cm::SpatialVectorV) * matrix.linkCount);
}
PX_FORCE_INLINE const Cm::SpatialVectorV* getMotionVector(const FsData& matrix)
{
return addAddr<const Cm::SpatialVectorV*>(getDeferredVel(matrix), sizeof(Cm::SpatialVectorV) * matrix.linkCount);
}
PX_FORCE_INLINE Cm::SpatialVectorV* getMotionVector(FsData& matrix)
{
return addAddr<Cm::SpatialVectorV*>(getDeferredVel(matrix), sizeof(Cm::SpatialVectorV) * matrix.linkCount);
}
PX_FORCE_INLINE Vec3V* getDeferredSZ(FsData& matrix)
{
return addAddr<Vec3V*>(getMotionVector(matrix), sizeof(Cm::SpatialVectorV) * matrix.linkCount);
}
PX_FORCE_INLINE const Vec3V* getDeferredSZ(const FsData& matrix)
{
return addAddr<const Vec3V*>(getMotionVector(matrix), sizeof(Cm::SpatialVectorV) * matrix.linkCount);
}
PX_FORCE_INLINE const PxReal* getMaxPenBias(const FsData& matrix)
{
return addAddr<const PxReal*>(getDeferredSZ(matrix), sizeof(Vec3V) * matrix.linkCount);
}
PX_FORCE_INLINE PxReal* getMaxPenBias(FsData& matrix)
{
return addAddr<PxReal*>(getDeferredSZ(matrix), sizeof(Vec3V) * matrix.linkCount);
}
PX_FORCE_INLINE FsJointVectors* getJointVectors(FsData& matrix)
{
return addAddr<FsJointVectors *>(&matrix,matrix.jointVectorOffset);
}
PX_FORCE_INLINE const FsJointVectors* getJointVectors(const FsData& matrix)
{
return addAddr<const FsJointVectors *>(&matrix,matrix.jointVectorOffset);
}
PX_FORCE_INLINE FsInertia& getRootInverseInertia(FsData& matrix)
{
return *addAddr<FsInertia*>(&matrix,matrix.fsDataOffset);
}
PX_FORCE_INLINE const FsInertia& getRootInverseInertia(const FsData& matrix)
{
return *addAddr<const FsInertia*>(&matrix,matrix.fsDataOffset);
}
PX_FORCE_INLINE FsRow* getFsRows(FsData& matrix)
{
return addAddr<FsRow*>(&getRootInverseInertia(matrix),sizeof(FsInertia));
}
PX_FORCE_INLINE const FsRow* getFsRows(const FsData& matrix)
{
return addAddr<const FsRow*>(&getRootInverseInertia(matrix),sizeof(FsInertia));
}
PX_FORCE_INLINE LtbRow* getLtbRows(FsData& matrix)
{
return addAddr<LtbRow*>(&matrix,matrix.ltbDataOffset);
}
PX_FORCE_INLINE const LtbRow* getLtbRows(const FsData& matrix)
{
return addAddr<const LtbRow*>(&matrix,matrix.ltbDataOffset);
}
PX_FORCE_INLINE Cm::SpatialVectorV* getRefVelocity(FsData& matrix)
{
return addAddr<Cm::SpatialVectorV*>(getLtbRows(matrix), sizeof(LtbRow)*matrix.linkCount);
}
PX_FORCE_INLINE const Cm::SpatialVectorV* getRefVelocity(const FsData& matrix)
{
return addAddr<const Cm::SpatialVectorV*>(getLtbRows(matrix), sizeof(LtbRow)*matrix.linkCount);
}
PX_FORCE_INLINE FsRowAux* getAux(FsData& matrix)
{
return addAddr<FsRowAux*>(getRefVelocity(matrix),sizeof(Cm::SpatialVectorV)*matrix.linkCount);
}
PX_FORCE_INLINE const FsRowAux* getAux(const FsData& matrix)
{
return addAddr<const FsRowAux*>(getRefVelocity(matrix),sizeof(Cm::SpatialVectorV)*matrix.linkCount);
}
//void PxcFsApplyImpulse(ArticulationV& articulation,
// PxU32 linkID,
// Vec3V linear,
// Vec3V angular);
//Cm::SpatialVectorV PxcFsGetVelocity(ArticulationV& articulation,
// PxU32 linkID);
Cm::SpatialVectorV PxcFsGetMotionVector(ArticulationV& articulation,
PxU32 linkID);
#if DY_ARTICULATION_DEBUG_VERIFY
namespace ArticulationRef
{
Cm::SpatialVector propagateVelocity(const FsRow& row,
const FsJointVectors& jv,
const PxVec3& SZ,
const Cm::SpatialVector& v,
const FsRowAux& aux);
Cm::SpatialVector propagateImpulse(const FsRow& row,
const FsJointVectors& jv,
PxVec3& SZ,
const Cm::SpatialVector& Z,
const FsRowAux& aux);
void applyImpulse(const FsData& matrix,
Cm::SpatialVector* velocity,
PxU32 linkID,
const Cm::SpatialVector& impulse);
}
#endif
}
}
#endif //DY_ARTICULATION_H

View File

@ -0,0 +1,404 @@
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2021 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#ifndef DY_BODYCORE_INTEGRATOR_H
#define DY_BODYCORE_INTEGRATOR_H
#include "CmPhysXCommon.h"
#include "PxvDynamics.h"
#include "PsMathUtils.h"
#include "PxsRigidBody.h"
#include "DySolverBody.h"
#include "DySleepingConfigulation.h"
#include "PxsIslandSim.h"
namespace physx
{
namespace Dy
{
PX_FORCE_INLINE void bodyCoreComputeUnconstrainedVelocity
(const PxVec3& gravity, const PxReal dt, const PxReal linearDamping, const PxReal angularDamping, const PxReal accelScale,
const PxReal maxLinearVelocitySq, const PxReal maxAngularVelocitySq, PxVec3& inOutLinearVelocity, PxVec3& inOutAngularVelocity,
bool disableGravity)
{
//Multiply everything that needs multiplied by dt to improve code generation.
PxVec3 linearVelocity = inOutLinearVelocity;
PxVec3 angularVelocity = inOutAngularVelocity;
const PxReal linearDampingTimesDT=linearDamping*dt;
const PxReal angularDampingTimesDT=angularDamping*dt;
const PxReal oneMinusLinearDampingTimesDT=1.0f-linearDampingTimesDT;
const PxReal oneMinusAngularDampingTimesDT=1.0f-angularDampingTimesDT;
//TODO context-global gravity
if (!disableGravity)
{
const PxVec3 linearAccelTimesDT = gravity*dt *accelScale;
linearVelocity += linearAccelTimesDT;
}
//Apply damping.
const PxReal linVelMultiplier = physx::intrinsics::fsel(oneMinusLinearDampingTimesDT, oneMinusLinearDampingTimesDT, 0.0f);
const PxReal angVelMultiplier = physx::intrinsics::fsel(oneMinusAngularDampingTimesDT, oneMinusAngularDampingTimesDT, 0.0f);
linearVelocity*=linVelMultiplier;
angularVelocity*=angVelMultiplier;
// Clamp velocity
const PxReal linVelSq = linearVelocity.magnitudeSquared();
if(linVelSq > maxLinearVelocitySq)
{
linearVelocity *= PxSqrt(maxLinearVelocitySq / linVelSq);
}
const PxReal angVelSq = angularVelocity.magnitudeSquared();
if(angVelSq > maxAngularVelocitySq)
{
angularVelocity *= PxSqrt(maxAngularVelocitySq / angVelSq);
}
inOutLinearVelocity = linearVelocity;
inOutAngularVelocity = angularVelocity;
}
PX_FORCE_INLINE void integrateCore(PxVec3& motionLinearVelocity, PxVec3& motionAngularVelocity, PxSolverBody& solverBody, PxSolverBodyData& solverBodyData, const PxF32 dt)
{
PxU32 lockFlags = solverBodyData.lockFlags;
if (lockFlags)
{
if (lockFlags & PxRigidDynamicLockFlag::eLOCK_LINEAR_X)
{
motionLinearVelocity.x = 0.f;
solverBody.linearVelocity.x = 0.f;
}
if (lockFlags & PxRigidDynamicLockFlag::eLOCK_LINEAR_Y)
{
motionLinearVelocity.y = 0.f;
solverBody.linearVelocity.y = 0.f;
}
if (lockFlags & PxRigidDynamicLockFlag::eLOCK_LINEAR_Z)
{
motionLinearVelocity.z = 0.f;
solverBody.linearVelocity.z = 0.f;
}
//The angular velocity should be 0 because it is now impossible to make it rotate around that axis!
if (lockFlags & PxRigidDynamicLockFlag::eLOCK_ANGULAR_X)
{
motionAngularVelocity.x = 0.f;
solverBody.angularState.x = 0.f;
}
if (lockFlags & PxRigidDynamicLockFlag::eLOCK_ANGULAR_Y)
{
motionAngularVelocity.y = 0.f;
solverBody.angularState.y = 0.f;
}
if (lockFlags & PxRigidDynamicLockFlag::eLOCK_ANGULAR_Z)
{
motionAngularVelocity.z = 0.f;
solverBody.angularState.z = 0.f;
}
}
// Integrate linear part
PxVec3 linearMotionVel = solverBodyData.linearVelocity + motionLinearVelocity;
PxVec3 delta = linearMotionVel * dt;
PxVec3 angularMotionVel = solverBodyData.angularVelocity + solverBodyData.sqrtInvInertia * motionAngularVelocity;
PxReal w = angularMotionVel.magnitudeSquared();
solverBodyData.body2World.p += delta;
PX_ASSERT(solverBodyData.body2World.p.isFinite());
//Store back the linear and angular velocities
//core.linearVelocity += solverBody.linearVelocity * solverBodyData.sqrtInvMass;
solverBodyData.linearVelocity += solverBody.linearVelocity;
solverBodyData.angularVelocity += solverBodyData.sqrtInvInertia * solverBody.angularState;
// Integrate the rotation using closed form quaternion integrator
if (w != 0.0f)
{
w = PxSqrt(w);
// Perform a post-solver clamping
// TODO(dsequeira): ignore this for the moment
//just clamp motionVel to half float-range
const PxReal maxW = 1e+7f; //Should be about sqrt(PX_MAX_REAL/2) or smaller
if (w > maxW)
{
angularMotionVel = angularMotionVel.getNormalized() * maxW;
w = maxW;
}
const PxReal v = dt * w * 0.5f;
PxReal s, q;
Ps::sincos(v, s, q);
s /= w;
const PxVec3 pqr = angularMotionVel * s;
const PxQuat quatVel(pqr.x, pqr.y, pqr.z, 0);
PxQuat result = quatVel * solverBodyData.body2World.q;
result += solverBodyData.body2World.q * q;
solverBodyData.body2World.q = result.getNormalized();
PX_ASSERT(solverBodyData.body2World.q.isSane());
PX_ASSERT(solverBodyData.body2World.q.isFinite());
}
motionLinearVelocity = linearMotionVel;
motionAngularVelocity = angularMotionVel;
}
PX_FORCE_INLINE PxReal updateWakeCounter(PxsRigidBody* originalBody, PxReal dt, PxReal /*invDt*/, const bool enableStabilization, const bool useAdaptiveForce, Cm::SpatialVector& motionVelocity,
bool hasStaticTouch)
{
//KS - at most one of these features can be enabled at any time
PX_ASSERT(!useAdaptiveForce || !enableStabilization);
PxsBodyCore& bodyCore = originalBody->getCore();
// update the body's sleep state and
PxReal wakeCounterResetTime = 20.0f*0.02f;
PxReal wc = bodyCore.wakeCounter;
{
if (enableStabilization)
{
bool freeze = false;
const PxTransform& body2World = bodyCore.body2World;
// calculate normalized energy: kinetic energy divided by mass
const PxVec3 t = bodyCore.inverseInertia;
const PxVec3 inertia(t.x > 0.f ? 1.0f / t.x : 1.f, t.y > 0.f ? 1.0f / t.y : 1.f, t.z > 0.f ? 1.0f / t.z : 1.f);
PxVec3 sleepLinVelAcc = motionVelocity.linear;
PxVec3 sleepAngVelAcc = body2World.q.rotateInv(motionVelocity.angular);
// scale threshold by cluster factor (more contacts => higher sleep threshold)
//const PxReal clusterFactor = PxReal(1u + getNumUniqueInteractions());
PxReal invMass = bodyCore.inverseMass;
if (invMass == 0.f)
invMass = 1.f;
const PxReal angular = sleepAngVelAcc.multiply(sleepAngVelAcc).dot(inertia) * invMass;
const PxReal linear = sleepLinVelAcc.magnitudeSquared();
PxReal frameNormalizedEnergy = 0.5f * (angular + linear);
const PxReal cf = hasStaticTouch ? PxReal(PxMin(10u, bodyCore.numBodyInteractions)) : 0.f;
const PxReal freezeThresh = cf*bodyCore.freezeThreshold;
originalBody->freezeCount = PxMax(originalBody->freezeCount - dt, 0.0f);
bool settled = true;
PxReal accelScale = PxMin(1.f, originalBody->accelScale + dt);
if (frameNormalizedEnergy >= freezeThresh)
{
settled = false;
originalBody->freezeCount = PXD_FREEZE_INTERVAL;
}
if (!hasStaticTouch)
{
accelScale = 1.f;
settled = false;
}
if (settled)
{
//Dampen bodies that are just about to go to sleep
if (cf > 1.f)
{
const PxReal sleepDamping = PXD_SLEEP_DAMPING;
const PxReal sleepDampingTimesDT = sleepDamping*dt;
const PxReal d = 1.0f - sleepDampingTimesDT;
bodyCore.linearVelocity = bodyCore.linearVelocity * d;
bodyCore.angularVelocity = bodyCore.angularVelocity * d;
accelScale = accelScale * 0.75f + 0.25f*PXD_FREEZE_SCALE;
}
freeze = originalBody->freezeCount == 0.f && frameNormalizedEnergy < (bodyCore.freezeThreshold * PXD_FREEZE_TOLERANCE);
}
originalBody->accelScale = accelScale;
const PxU32 wasFrozen = originalBody->mInternalFlags & PxsRigidBody::eFROZEN;
PxU16 flags;
if(freeze)
{
//current flag isn't frozen but freeze flag raise so we need to raise the frozen flag in this frame
flags = PxU16(PxsRigidBody::eFROZEN);
if(!wasFrozen)
flags |= PxsRigidBody::eFREEZE_THIS_FRAME;
bodyCore.body2World = originalBody->getLastCCDTransform();
}
else
{
flags = 0;
if(wasFrozen)
flags |= PxsRigidBody::eUNFREEZE_THIS_FRAME;
}
originalBody->mInternalFlags = flags;
/*KS: New algorithm for sleeping when using stabilization:
* Energy *this frame* must be higher than sleep threshold and accumulated energy over previous frames
* must be higher than clusterFactor*energyThreshold.
*/
if (wc < wakeCounterResetTime * 0.5f || wc < dt)
{
//Accumulate energy
originalBody->sleepLinVelAcc += sleepLinVelAcc;
originalBody->sleepAngVelAcc += sleepAngVelAcc;
//If energy this frame is high
if (frameNormalizedEnergy >= bodyCore.sleepThreshold)
{
//Compute energy over sleep preparation time
const PxReal sleepAngular = originalBody->sleepAngVelAcc.multiply(originalBody->sleepAngVelAcc).dot(inertia) * invMass;
const PxReal sleepLinear = originalBody->sleepLinVelAcc.magnitudeSquared();
PxReal normalizedEnergy = 0.5f * (sleepAngular + sleepLinear);
const PxReal sleepClusterFactor = PxReal(1u + bodyCore.numCountedInteractions);
// scale threshold by cluster factor (more contacts => higher sleep threshold)
const PxReal threshold = sleepClusterFactor*bodyCore.sleepThreshold;
//If energy over sleep preparation time is high
if (normalizedEnergy >= threshold)
{
//Wake up
//PX_ASSERT(isActive());
originalBody->sleepAngVelAcc = PxVec3(0);
originalBody->sleepLinVelAcc = PxVec3(0);
const float factor = bodyCore.sleepThreshold == 0.f ? 2.0f : PxMin(normalizedEnergy / threshold, 2.0f);
PxReal oldWc = wc;
wc = factor * 0.5f * wakeCounterResetTime + dt * (sleepClusterFactor - 1.0f);
bodyCore.solverWakeCounter = wc;
//if (oldWc == 0.0f) // for the case where a sleeping body got activated by the system (not the user) AND got processed by the solver as well
// notifyNotReadyForSleeping(bodyCore.nodeIndex);
if (oldWc == 0.0f)
originalBody->mInternalFlags |= PxsRigidBody::eACTIVATE_THIS_FRAME;
return wc;
}
}
}
}
else
{
if (useAdaptiveForce)
{
if (hasStaticTouch && bodyCore.numBodyInteractions > 1)
originalBody->accelScale = 1.f / PxReal(bodyCore.numBodyInteractions);
else
originalBody->accelScale = 1.f;
}
if (wc < wakeCounterResetTime * 0.5f || wc < dt)
{
const PxTransform& body2World = bodyCore.body2World;
// calculate normalized energy: kinetic energy divided by mass
const PxVec3 t = bodyCore.inverseInertia;
const PxVec3 inertia(t.x > 0.f ? 1.0f / t.x : 1.f, t.y > 0.f ? 1.0f / t.y : 1.f, t.z > 0.f ? 1.0f / t.z : 1.f);
PxVec3 sleepLinVelAcc = motionVelocity.linear;
PxVec3 sleepAngVelAcc = body2World.q.rotateInv(motionVelocity.angular);
originalBody->sleepLinVelAcc += sleepLinVelAcc;
originalBody->sleepAngVelAcc += sleepAngVelAcc;
PxReal invMass = bodyCore.inverseMass;
if (invMass == 0.f)
invMass = 1.f;
const PxReal angular = originalBody->sleepAngVelAcc.multiply(originalBody->sleepAngVelAcc).dot(inertia) * invMass;
const PxReal linear = originalBody->sleepLinVelAcc.magnitudeSquared();
PxReal normalizedEnergy = 0.5f * (angular + linear);
// scale threshold by cluster factor (more contacts => higher sleep threshold)
const PxReal clusterFactor = PxReal(1 + bodyCore.numCountedInteractions);
const PxReal threshold = clusterFactor*bodyCore.sleepThreshold;
if (normalizedEnergy >= threshold)
{
//PX_ASSERT(isActive());
originalBody->sleepLinVelAcc = PxVec3(0);
originalBody->sleepAngVelAcc = PxVec3(0);
const float factor = threshold == 0.f ? 2.0f : PxMin(normalizedEnergy / threshold, 2.0f);
PxReal oldWc = wc;
wc = factor * 0.5f * wakeCounterResetTime + dt * (clusterFactor - 1.0f);
bodyCore.solverWakeCounter = wc;
PxU16 flags = 0;
if (oldWc == 0.0f) // for the case where a sleeping body got activated by the system (not the user) AND got processed by the solver as well
{
flags |= PxsRigidBody::eACTIVATE_THIS_FRAME;
//notifyNotReadyForSleeping(bodyCore.nodeIndex);
}
originalBody->mInternalFlags = flags;
return wc;
}
}
}
}
wc = PxMax(wc - dt, 0.0f);
bodyCore.solverWakeCounter = wc;
return wc;
}
PX_FORCE_INLINE void sleepCheck(PxsRigidBody* originalBody, const PxReal dt, const PxReal intDt, const bool enableStabilization, bool useAdaptiveForce, Cm::SpatialVector& motionVelocity,
bool hasStaticTouch)
{
PxReal wc = updateWakeCounter(originalBody, dt, intDt, enableStabilization, useAdaptiveForce, motionVelocity, hasStaticTouch);
bool wakeCounterZero = (wc == 0.0f);
if (wakeCounterZero)
{
//PxsBodyCore& bodyCore = originalBody->getCore();
originalBody->mInternalFlags |= PxsRigidBody::eDEACTIVATE_THIS_FRAME;
// notifyReadyForSleeping(bodyCore.nodeIndex);
originalBody->sleepLinVelAcc = PxVec3(0);
originalBody->sleepAngVelAcc = PxVec3(0);
}
}
}
}
#endif //DY_BODYCORE_INTEGRATOR_H

View File

@ -0,0 +1,926 @@
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2021 NVIDIA Corporation. All rights reserved.
#include "DyConstraintPartition.h"
#include "DyArticulationUtils.h"
#define INTERLEAVE_SELF_CONSTRAINTS 1
namespace physx
{
namespace Dy
{
namespace
{
PX_FORCE_INLINE PxU32 getArticulationIndex(const uintptr_t eaArticulation, const uintptr_t* eaArticulations, const PxU32 numEas)
{
PxU32 index=0xffffffff;
for(PxU32 i=0;i<numEas;i++)
{
if(eaArticulations[i]== eaArticulation)
{
index=i;
break;
}
}
PX_ASSERT(index!=0xffffffff);
return index;
}
#define MAX_NUM_PARTITIONS 32
class RigidBodyClassification
{
PxU8* PX_RESTRICT mBodies;
PxU32 mBodySize;
PxU32 mBodyStride;
PxU32 mBodyCount;
public:
RigidBodyClassification(PxU8* PX_RESTRICT bodies, PxU32 bodyCount, PxU32 bodyStride) : mBodies(bodies), mBodySize(bodyCount*bodyStride), mBodyStride(bodyStride),
mBodyCount(bodyCount)
{
}
//Returns true if it is a dynamic-dynamic constriant; false if it is a dynamic-static or dynamic-kinematic constraint
PX_FORCE_INLINE bool classifyConstraint(const PxSolverConstraintDesc& desc, uintptr_t& indexA, uintptr_t& indexB,
bool& activeA, bool& activeB, PxU32& bodyAProgress, PxU32& bodyBProgress) const
{
indexA=uintptr_t(reinterpret_cast<PxU8*>(desc.bodyA) - mBodies)/mBodyStride;
indexB=uintptr_t(reinterpret_cast<PxU8*>(desc.bodyB) - mBodies)/mBodyStride;
activeA = indexA < mBodyCount;
activeB = indexB < mBodyCount;
bodyAProgress = desc.bodyA->solverProgress;
bodyBProgress = desc.bodyB->solverProgress;
return activeA && activeB;
}
PX_FORCE_INLINE void getProgress(const PxSolverConstraintDesc& desc,
PxU32& bodyAProgress, PxU32& bodyBProgress)
{
bodyAProgress = desc.bodyA->solverProgress;
bodyBProgress = desc.bodyB->solverProgress;
}
PX_FORCE_INLINE void storeProgress(const PxSolverConstraintDesc& desc, const PxU32 bodyAProgress, const PxU32 bodyBProgress,
const PxU16 availablePartition)
{
desc.bodyA->solverProgress = bodyAProgress;
desc.bodyA->maxSolverNormalProgress = PxMax(desc.bodyA->maxSolverNormalProgress, availablePartition);
desc.bodyB->solverProgress = bodyBProgress;
desc.bodyB->maxSolverNormalProgress = PxMax(desc.bodyB->maxSolverNormalProgress, availablePartition);
}
PX_FORCE_INLINE void storeProgress(const PxSolverConstraintDesc& desc, const PxU32 bodyAProgress, const PxU32 bodyBProgress)
{
desc.bodyA->solverProgress = bodyAProgress;
desc.bodyB->solverProgress = bodyBProgress;
}
PX_FORCE_INLINE PxU32 getStaticContactWriteIndex(const PxSolverConstraintDesc& desc,
bool activeA, bool activeB)
{
if (activeA)
return PxU32(desc.bodyA->maxSolverNormalProgress + desc.bodyA->maxSolverFrictionProgress++);
else if (activeB)
return PxU32(desc.bodyB->maxSolverNormalProgress + desc.bodyB->maxSolverFrictionProgress++);
return 0xffffffff;
}
PX_FORCE_INLINE void recordStaticConstraint(const PxSolverConstraintDesc& desc, bool& activeA, bool& activeB) const
{
if (activeA)
{
desc.bodyA->maxSolverFrictionProgress++;
}
if (activeB)
{
desc.bodyB->maxSolverFrictionProgress++;
}
}
PX_FORCE_INLINE void clearState()
{
for(PxU32 a = 0; a < mBodySize; a+= mBodyStride)
reinterpret_cast<PxSolverBody*>(mBodies+a)->solverProgress = 0;
}
PX_FORCE_INLINE void reserveSpaceForStaticConstraints(Ps::Array<PxU32>& numConstraintsPerPartition)
{
for(PxU32 a = 0; a < mBodySize; a += mBodyStride)
{
PxSolverBody& body = *reinterpret_cast<PxSolverBody*>(mBodies+a);
body.solverProgress = 0;
PxU32 requiredSize = PxU32(body.maxSolverNormalProgress + body.maxSolverFrictionProgress);
if(requiredSize > numConstraintsPerPartition.size())
{
numConstraintsPerPartition.resize(requiredSize);
}
for(PxU32 b = 0; b < body.maxSolverFrictionProgress; ++b)
{
numConstraintsPerPartition[body.maxSolverNormalProgress + b]++;
}
}
}
};
class ExtendedRigidBodyClassification
{
PxU8* PX_RESTRICT mBodies;
PxU32 mBodyCount;
PxU32 mBodySize;
PxU32 mStride;
//uintptr_t* PX_RESTRICT mFsDatas;
uintptr_t* PX_RESTRICT mArticulations;
PxU32 mNumArticulations;
public:
ExtendedRigidBodyClassification(PxU8* PX_RESTRICT bodies, PxU32 numBodies, PxU32 stride, uintptr_t* articulations, PxU32 numArticulations)
: mBodies(bodies), mBodyCount(numBodies), mBodySize(numBodies*stride), mStride(stride), mArticulations(articulations), mNumArticulations(numArticulations)
{
}
//Returns true if it is a dynamic-dynamic constriant; false if it is a dynamic-static or dynamic-kinematic constraint
PX_FORCE_INLINE bool classifyConstraint(const PxSolverConstraintDesc& desc, uintptr_t& indexA, uintptr_t& indexB,
bool& activeA, bool& activeB, PxU32& bodyAProgress, PxU32& bodyBProgress) const
{
bool hasStatic = false;
if(PxSolverConstraintDesc::NO_LINK == desc.linkIndexA)
{
indexA=uintptr_t(reinterpret_cast<PxU8*>(desc.bodyA) - mBodies)/mStride;
activeA = indexA < mBodyCount;
hasStatic = !activeA;//desc.bodyADataIndex == 0;
bodyAProgress = activeA ? desc.bodyA->solverProgress: 0;
}
else
{
ArticulationV* articulationA = desc.articulationA;
indexA=mBodyCount+getArticulationIndex(uintptr_t(articulationA), mArticulations ,mNumArticulations);
//bodyAProgress = articulationA->getFsDataPtr()->solverProgress;
bodyAProgress = articulationA->solverProgress;
activeA = true;
}
if(PxSolverConstraintDesc::NO_LINK == desc.linkIndexB)
{
indexB=uintptr_t(reinterpret_cast<PxU8*>(desc.bodyB) - mBodies)/mStride;
activeB = indexB < mBodyCount;
hasStatic = hasStatic || !activeB;
bodyBProgress = activeB ? desc.bodyB->solverProgress : 0;
}
else
{
ArticulationV* articulationB = desc.articulationB;
indexB=mBodyCount+getArticulationIndex(uintptr_t(articulationB), mArticulations, mNumArticulations);
activeB = true;
bodyBProgress = articulationB->solverProgress;
}
return !hasStatic;
}
PX_FORCE_INLINE void getProgress(const PxSolverConstraintDesc& desc,
PxU32& bodyAProgress, PxU32& bodyBProgress) const
{
if (PxSolverConstraintDesc::NO_LINK == desc.linkIndexA)
{
bodyAProgress = desc.bodyA->solverProgress;
}
else
{
ArticulationV* articulationA = desc.articulationA;
bodyAProgress = articulationA->solverProgress;
}
if (PxSolverConstraintDesc::NO_LINK == desc.linkIndexB)
{
bodyBProgress = desc.bodyB->solverProgress;
}
else
{
ArticulationV* articulationB = desc.articulationB;
bodyBProgress = articulationB->solverProgress;
}
}
PX_FORCE_INLINE void storeProgress(const PxSolverConstraintDesc& desc, const PxU32 bodyAProgress, const PxU32 bodyBProgress,
const PxU16 availablePartition)
{
if (PxSolverConstraintDesc::NO_LINK == desc.linkIndexA)
{
desc.bodyA->solverProgress = bodyAProgress;
desc.bodyA->maxSolverNormalProgress = PxMax(desc.bodyA->maxSolverNormalProgress, availablePartition);
}
else
{
ArticulationV* articulationA = desc.articulationA;
articulationA->solverProgress = bodyAProgress;
articulationA->maxSolverNormalProgress = PxMax(articulationA->maxSolverNormalProgress, availablePartition);
}
if (PxSolverConstraintDesc::NO_LINK == desc.linkIndexB)
{
desc.bodyB->solverProgress = bodyBProgress;
desc.bodyB->maxSolverNormalProgress = PxMax(desc.bodyB->maxSolverNormalProgress, availablePartition);
}
else
{
ArticulationV* articulationB = desc.articulationB;
articulationB->solverProgress = bodyBProgress;
articulationB->maxSolverNormalProgress = PxMax(articulationB->maxSolverNormalProgress, availablePartition);
}
}
PX_FORCE_INLINE void storeProgress(const PxSolverConstraintDesc& desc, const PxU32 bodyAProgress,
const PxU32 bodyBProgress)
{
if (PxSolverConstraintDesc::NO_LINK == desc.linkIndexA)
{
desc.bodyA->solverProgress = bodyAProgress;
}
else
{
ArticulationV* articulationA = desc.articulationA;
articulationA->solverProgress = bodyAProgress;
}
if (PxSolverConstraintDesc::NO_LINK == desc.linkIndexB)
{
desc.bodyB->solverProgress = bodyBProgress;
}
else
{
ArticulationV* articulationB = desc.articulationB;
articulationB->solverProgress = bodyBProgress;
}
}
PX_FORCE_INLINE void recordStaticConstraint(const PxSolverConstraintDesc& desc, bool& activeA, bool& activeB)
{
if (activeA)
{
if (PxSolverConstraintDesc::NO_LINK == desc.linkIndexA)
desc.bodyA->maxSolverFrictionProgress++;
else
{
ArticulationV* articulationA = desc.articulationA;
if(!articulationA->willStoreStaticConstraint())
articulationA->maxSolverFrictionProgress++;
}
}
if (activeB)
{
if (PxSolverConstraintDesc::NO_LINK == desc.linkIndexB)
desc.bodyB->maxSolverFrictionProgress++;
else
{
ArticulationV* articulationB = desc.articulationB;
if (!articulationB->willStoreStaticConstraint())
articulationB->maxSolverFrictionProgress++;
}
}
}
PX_FORCE_INLINE PxU32 getStaticContactWriteIndex(const PxSolverConstraintDesc& desc,
bool activeA, bool activeB)
{
if (activeA)
{
if (PxSolverConstraintDesc::NO_LINK == desc.linkIndexA)
{
return PxU32(desc.bodyA->maxSolverNormalProgress + desc.bodyA->maxSolverFrictionProgress++);
}
else
{
ArticulationV* articulationA = desc.articulationA;
//Attempt to store static constraints on the articulation (only supported with the reduced coordinate articulations).
//This acts as an optimization
if(articulationA->storeStaticConstraint(desc))
return 0xffffffff;
return PxU32(articulationA->maxSolverNormalProgress + articulationA->maxSolverFrictionProgress++);
}
}
else if (activeB)
{
if (PxSolverConstraintDesc::NO_LINK == desc.linkIndexB)
{
return PxU32(desc.bodyB->maxSolverNormalProgress + desc.bodyB->maxSolverFrictionProgress++);
}
else
{
ArticulationV* articulationB = desc.articulationB;
//Attempt to store static constraints on the articulation (only supported with the reduced coordinate articulations).
//This acts as an optimization
if (articulationB->storeStaticConstraint(desc))
return 0xffffffff;
return PxU32(articulationB->maxSolverNormalProgress + articulationB->maxSolverFrictionProgress++);
}
}
return 0xffffffff;
}
PX_FORCE_INLINE void clearState()
{
for(PxU32 a = 0; a < mBodySize; a+= mStride)
reinterpret_cast<PxSolverBody*>(mBodies+a)->solverProgress = 0;
for(PxU32 a = 0; a < mNumArticulations; ++a)
(reinterpret_cast<ArticulationV*>(mArticulations[a]))->solverProgress = 0;
}
PX_FORCE_INLINE void reserveSpaceForStaticConstraints(Ps::Array<PxU32>& numConstraintsPerPartition)
{
for(PxU32 a = 0; a < mBodySize; a+= mStride)
{
PxSolverBody& body = *reinterpret_cast<PxSolverBody*>(mBodies+a);
body.solverProgress = 0;
PxU32 requiredSize = PxU32(body.maxSolverNormalProgress + body.maxSolverFrictionProgress);
if(requiredSize > numConstraintsPerPartition.size())
{
numConstraintsPerPartition.resize(requiredSize);
}
for(PxU32 b = 0; b < body.maxSolverFrictionProgress; ++b)
{
numConstraintsPerPartition[body.maxSolverNormalProgress + b]++;
}
}
for(PxU32 a = 0; a < mNumArticulations; ++a)
{
ArticulationV* articulation = reinterpret_cast<ArticulationV*>(mArticulations[a]);
articulation->solverProgress = 0;
PxU32 requiredSize = PxU32(articulation->maxSolverNormalProgress + articulation->maxSolverFrictionProgress);
if(requiredSize > numConstraintsPerPartition.size())
{
numConstraintsPerPartition.resize(requiredSize);
}
for(PxU32 b = 0; b < articulation->maxSolverFrictionProgress; ++b)
{
numConstraintsPerPartition[articulation->maxSolverNormalProgress + b]++;
}
}
}
};
template <typename Classification>
void classifyConstraintDesc(const PxSolverConstraintDesc* PX_RESTRICT descs, const PxU32 numConstraints, Classification& classification,
Ps::Array<PxU32>& numConstraintsPerPartition, PxSolverConstraintDesc* PX_RESTRICT eaTempConstraintDescriptors)
{
const PxSolverConstraintDesc* _desc = descs;
const PxU32 numConstraintsMin1 = numConstraints - 1;
PxU32 numUnpartitionedConstraints = 0;
numConstraintsPerPartition.forceSize_Unsafe(32);
PxMemZero(numConstraintsPerPartition.begin(), sizeof(PxU32) * 32);
for(PxU32 i = 0; i < numConstraints; ++i, _desc++)
{
const PxU32 prefetchOffset = PxMin(numConstraintsMin1 - i, 4u);
Ps::prefetchLine(_desc[prefetchOffset].constraint);
Ps::prefetchLine(_desc[prefetchOffset].bodyA);
Ps::prefetchLine(_desc[prefetchOffset].bodyB);
Ps::prefetchLine(_desc + 8);
uintptr_t indexA, indexB;
bool activeA, activeB;
PxU32 partitionsA, partitionsB;
const bool notContainsStatic = classification.classifyConstraint(*_desc, indexA, indexB, activeA, activeB,
partitionsA, partitionsB);
if(notContainsStatic)
{
PxU32 availablePartition;
{
const PxU32 combinedMask = (~partitionsA & ~partitionsB);
availablePartition = combinedMask == 0 ? MAX_NUM_PARTITIONS : Ps::lowestSetBit(combinedMask);
if(availablePartition == MAX_NUM_PARTITIONS)
{
eaTempConstraintDescriptors[numUnpartitionedConstraints++] = *_desc;
continue;
}
const PxU32 partitionBit = (1u << availablePartition);
if (activeA)
partitionsA |= partitionBit;
if(activeB)
partitionsB |= partitionBit;
}
numConstraintsPerPartition[availablePartition]++;
availablePartition++;
classification.storeProgress(*_desc, partitionsA, partitionsB, PxU16(availablePartition));
}
else
{
classification.recordStaticConstraint(*_desc, activeA, activeB);
}
}
PxU32 partitionStartIndex = 0;
while(numUnpartitionedConstraints > 0)
{
classification.clearState();
partitionStartIndex += 32;
//Keep partitioning the un-partitioned constraints and blat the whole thing to 0!
numConstraintsPerPartition.resize(32 + numConstraintsPerPartition.size());
PxMemZero(numConstraintsPerPartition.begin() + partitionStartIndex, sizeof(PxU32) * 32);
PxU32 newNumUnpartitionedConstraints = 0;
PxU32 partitionsA, partitionsB;
bool activeA, activeB;
uintptr_t indexA, indexB;
for(PxU32 i = 0; i < numUnpartitionedConstraints; ++i)
{
const PxSolverConstraintDesc& desc = eaTempConstraintDescriptors[i];
classification.classifyConstraint(desc, indexA, indexB, activeA, activeB,
partitionsA, partitionsB);
PxU32 availablePartition;
{
const PxU32 combinedMask = (~partitionsA & ~partitionsB);
availablePartition = combinedMask == 0 ? MAX_NUM_PARTITIONS : Ps::lowestSetBit(combinedMask);
if(availablePartition == MAX_NUM_PARTITIONS)
{
//Need to shuffle around unpartitioned constraints...
eaTempConstraintDescriptors[newNumUnpartitionedConstraints++] = desc;
continue;
}
const PxU32 partitionBit = (1u << availablePartition);
if(activeA)
partitionsA |= partitionBit;
if(activeB)
partitionsB |= partitionBit;
}
/*desc.bodyA->solverProgress = partitionsA;
desc.bodyB->solverProgress = partitionsB;*/
availablePartition += partitionStartIndex;
numConstraintsPerPartition[availablePartition]++;
availablePartition++;
classification.storeProgress(desc, partitionsA, partitionsB, PxU16(availablePartition) );
/* desc.bodyA->maxSolverNormalProgress = PxMax(desc.bodyA->maxSolverNormalProgress, PxU16(availablePartition));
desc.bodyB->maxSolverNormalProgress = PxMax(desc.bodyB->maxSolverNormalProgress, PxU16(availablePartition));*/
}
numUnpartitionedConstraints = newNumUnpartitionedConstraints;
}
classification.reserveSpaceForStaticConstraints(numConstraintsPerPartition);
}
template <typename Classification>
PxU32 writeConstraintDesc(const PxSolverConstraintDesc* PX_RESTRICT descs, const PxU32 numConstraints, Classification& classification,
Ps::Array<PxU32>& accumulatedConstraintsPerPartition, PxSolverConstraintDesc* eaTempConstraintDescriptors,
PxSolverConstraintDesc* PX_RESTRICT eaOrderedConstraintDesc)
{
PX_UNUSED(eaTempConstraintDescriptors);
const PxSolverConstraintDesc* _desc = descs;
const PxU32 numConstraintsMin1 = numConstraints - 1;
PxU32 numUnpartitionedConstraints = 0;
PxU32 numStaticConstraints = 0;
for(PxU32 i = 0; i < numConstraints; ++i, _desc++)
{
const PxU32 prefetchOffset = PxMin(numConstraintsMin1 - i, 4u);
Ps::prefetchLine(_desc[prefetchOffset].constraint);
Ps::prefetchLine(_desc[prefetchOffset].bodyA);
Ps::prefetchLine(_desc[prefetchOffset].bodyB);
Ps::prefetchLine(_desc + 8);
uintptr_t indexA, indexB;
bool activeA, activeB;
PxU32 partitionsA, partitionsB;
const bool notContainsStatic = classification.classifyConstraint(*_desc, indexA, indexB, activeA, activeB, partitionsA, partitionsB);
if(notContainsStatic)
{
PxU32 availablePartition;
{
const PxU32 combinedMask = (~partitionsA & ~partitionsB);
availablePartition = combinedMask == 0 ? MAX_NUM_PARTITIONS : Ps::lowestSetBit(combinedMask);
if(availablePartition == MAX_NUM_PARTITIONS)
{
eaTempConstraintDescriptors[numUnpartitionedConstraints++] = *_desc;
continue;
}
const PxU32 partitionBit = (1u << availablePartition);
if(activeA)
partitionsA |= partitionBit;
if(activeB)
partitionsB |= partitionBit;
}
classification.storeProgress(*_desc, partitionsA, partitionsB, PxU16(availablePartition + 1));
eaOrderedConstraintDesc[accumulatedConstraintsPerPartition[availablePartition]++] = *_desc;
}
else
{
//Just count the number of static constraints and store in maxSolverFrictionProgress...
PxU32 index = classification.getStaticContactWriteIndex(*_desc, activeA, activeB);
if (index != 0xffffffff)
{
eaOrderedConstraintDesc[accumulatedConstraintsPerPartition[index]++] = *_desc;
}
else
numStaticConstraints++;
}
}
PxU32 partitionStartIndex = 0;
while(numUnpartitionedConstraints > 0)
{
classification.clearState();
partitionStartIndex += 32;
PxU32 newNumUnpartitionedConstraints = 0;
PxU32 partitionsA, partitionsB;
bool activeA, activeB;
uintptr_t indexA, indexB;
for(PxU32 i = 0; i < numUnpartitionedConstraints; ++i)
{
const PxSolverConstraintDesc& desc = eaTempConstraintDescriptors[i];
/* PxU32 partitionsA=desc.bodyA->solverProgress;
PxU32 partitionsB=desc.bodyB->solverProgress;*/
classification.classifyConstraint(desc, indexA, indexB,
activeA, activeB, partitionsA, partitionsB);
PxU32 availablePartition;
{
const PxU32 combinedMask = (~partitionsA & ~partitionsB);
availablePartition = combinedMask == 0 ? MAX_NUM_PARTITIONS : Ps::lowestSetBit(combinedMask);
if(availablePartition == MAX_NUM_PARTITIONS)
{
//Need to shuffle around unpartitioned constraints...
eaTempConstraintDescriptors[newNumUnpartitionedConstraints++] = desc;
continue;
}
const PxU32 partitionBit = (1u << availablePartition);
if(activeA)
partitionsA |= partitionBit;
if(activeB)
partitionsB |= partitionBit;
}
/*desc.bodyA->solverProgress = partitionsA;
desc.bodyB->solverProgress = partitionsB;
*/
classification.storeProgress(desc, partitionsA, partitionsB);
availablePartition += partitionStartIndex;
eaOrderedConstraintDesc[accumulatedConstraintsPerPartition[availablePartition]++] = desc;
}
numUnpartitionedConstraints = newNumUnpartitionedConstraints;
}
return numStaticConstraints;
}
}
#define PX_NORMALIZE_PARTITIONS 1
#if PX_NORMALIZE_PARTITIONS
template<typename Classification>
PxU32 normalizePartitions(Ps::Array<PxU32>& accumulatedConstraintsPerPartition, PxSolverConstraintDesc* PX_RESTRICT eaOrderedConstraintDescriptors,
const PxU32 numConstraintDescriptors, Ps::Array<PxU32>& bitField, const Classification& classification, const PxU32 numBodies, const PxU32 numArticulations)
{
PxU32 numPartitions = 0;
PxU32 prevAccumulation = 0;
for(; numPartitions < accumulatedConstraintsPerPartition.size() && accumulatedConstraintsPerPartition[numPartitions] > prevAccumulation;
prevAccumulation = accumulatedConstraintsPerPartition[numPartitions++]);
PxU32 targetSize = (numPartitions == 0 ? 0 : (numConstraintDescriptors)/numPartitions);
bitField.reserve((numBodies + numArticulations + 31)/32);
bitField.forceSize_Unsafe((numBodies + numArticulations + 31)/32);
for(PxU32 i = numPartitions; i > 0; i--)
{
PxU32 partitionIndex = i-1;
//Build the partition mask...
PxU32 startIndex = partitionIndex == 0 ? 0 : accumulatedConstraintsPerPartition[partitionIndex-1];
PxU32 endIndex = accumulatedConstraintsPerPartition[partitionIndex];
//If its greater than target size, there's nothing that will be pulled into it from earlier partitions
if((endIndex - startIndex) >= targetSize)
continue;
PxMemZero(bitField.begin(), sizeof(PxU32)*bitField.size());
for(PxU32 a = startIndex; a < endIndex; ++a)
{
PxSolverConstraintDesc& desc = eaOrderedConstraintDescriptors[a];
uintptr_t indexA, indexB;
bool activeA, activeB;
PxU32 partitionsA, partitionsB;
classification.classifyConstraint(desc, indexA, indexB, activeA, activeB, partitionsA, partitionsB);
if (activeA)
bitField[PxU32(indexA) / 32] |= (1u << (indexA & 31));
if(activeB)
bitField[PxU32(indexB)/32] |= (1u << (indexB & 31));
}
bool bTerm = false;
for(PxU32 a = partitionIndex; a > 0 && !bTerm; --a)
{
PxU32 pInd = a-1;
PxU32 si = pInd == 0 ? 0 : accumulatedConstraintsPerPartition[pInd-1];
PxU32 ei = accumulatedConstraintsPerPartition[pInd];
for(PxU32 b = ei; b > si && !bTerm; --b)
{
PxU32 ind = b-1;
PxSolverConstraintDesc& desc = eaOrderedConstraintDescriptors[ind];
uintptr_t indexA, indexB;
bool activeA, activeB;
PxU32 partitionsA, partitionsB;
classification.classifyConstraint(desc, indexA, indexB, activeA, activeB, partitionsA, partitionsB);
bool canAdd = true;
if(activeA && (bitField[PxU32(indexA)/32] & (1u << (indexA & 31))))
canAdd = false;
if(activeB && (bitField[PxU32(indexB)/32] & (1u << (indexB & 31))))
canAdd = false;
if(canAdd)
{
PxSolverConstraintDesc tmp = eaOrderedConstraintDescriptors[ind];
if(activeA)
bitField[PxU32(indexA)/32] |= (1u << (indexA & 31));
if(activeB)
bitField[PxU32(indexB)/32] |= (1u << (indexB & 31));
PxU32 index = ind;
for(PxU32 c = pInd; c < partitionIndex; ++c)
{
PxU32 newIndex = --accumulatedConstraintsPerPartition[c];
if(index != newIndex)
eaOrderedConstraintDescriptors[index] = eaOrderedConstraintDescriptors[newIndex];
index = newIndex;
}
if(index != ind)
eaOrderedConstraintDescriptors[index] = tmp;
if((accumulatedConstraintsPerPartition[partitionIndex] - accumulatedConstraintsPerPartition[partitionIndex-1]) >= targetSize)
{
bTerm = true;
break;
}
}
}
}
}
PxU32 partitionCount = 0;
PxU32 lastPartitionCount = 0;
for (PxU32 a = 0; a < numPartitions; ++a)
{
const PxU32 constraintCount = accumulatedConstraintsPerPartition[a];
accumulatedConstraintsPerPartition[partitionCount] = constraintCount;
if (constraintCount != lastPartitionCount)
{
lastPartitionCount = constraintCount;
partitionCount++;
}
}
accumulatedConstraintsPerPartition.forceSize_Unsafe(partitionCount);
return partitionCount;
}
#endif
PxU32 partitionContactConstraints(ConstraintPartitionArgs& args)
{
PxU32 maxPartition = 0;
//Unpack the input data.
const PxU32 numBodies=args.mNumBodies;
const PxU32 numArticulations=args.mNumArticulationPtrs;
const PxU32 numConstraintDescriptors=args.mNumContactConstraintDescriptors;
PxSolverConstraintDesc* PX_RESTRICT eaConstraintDescriptors=args.mContactConstraintDescriptors;
PxSolverConstraintDesc* PX_RESTRICT eaOrderedConstraintDescriptors=args.mOrderedContactConstraintDescriptors;
PxSolverConstraintDesc* PX_RESTRICT eaTempConstraintDescriptors=args.mTempContactConstraintDescriptors;
Ps::Array<PxU32>& constraintsPerPartition = *args.mConstraintsPerPartition;
constraintsPerPartition.forceSize_Unsafe(0);
const PxU32 stride = args.mStride;
for(PxU32 a = 0, offset = 0; a < numBodies; ++a, offset += stride)
{
PxSolverBody& body = *reinterpret_cast<PxSolverBody*>(args.mBodies + offset);
body.solverProgress = 0;
//We re-use maxSolverFrictionProgress and maxSolverNormalProgress to record the
//maximum partition used by dynamic constraints and the number of static constraints affecting
//a body. We use this to make partitioning much cheaper and be able to support
body.maxSolverFrictionProgress = 0;
body.maxSolverNormalProgress = 0;
}
PxU32 numOrderedConstraints=0;
PxU32 numStaticConstraints = 0;
if(numArticulations == 0)
{
RigidBodyClassification classification(args.mBodies, numBodies, stride);
classifyConstraintDesc(eaConstraintDescriptors, numConstraintDescriptors, classification, constraintsPerPartition,
eaTempConstraintDescriptors);
PxU32 accumulation = 0;
for(PxU32 a = 0; a < constraintsPerPartition.size(); ++a)
{
PxU32 count = constraintsPerPartition[a];
constraintsPerPartition[a] = accumulation;
accumulation += count;
}
for(PxU32 a = 0, offset = 0; a < numBodies; ++a, offset += stride)
{
PxSolverBody& body = *reinterpret_cast<PxSolverBody*>(args.mBodies + offset);
Ps::prefetchLine(&args.mBodies[a], 256);
body.solverProgress = 0;
//Keep the dynamic constraint count but bump the static constraint count back to 0.
//This allows us to place the static constraints in the appropriate place when we see them
//because we know the maximum index for the dynamic constraints...
body.maxSolverFrictionProgress = 0;
}
writeConstraintDesc(eaConstraintDescriptors, numConstraintDescriptors, classification, constraintsPerPartition,
eaTempConstraintDescriptors, eaOrderedConstraintDescriptors);
numOrderedConstraints = numConstraintDescriptors;
/*if(!args.enhancedDeterminism)
maxPartition = normalizePartitions(constraintsPerPartition, eaOrderedConstraintDescriptors, numConstraintDescriptors, *args.mBitField,
classification, numBodies, 0);*/
}
else
{
const ArticulationSolverDesc* articulationDescs=args.mArticulationPtrs;
PX_ALLOCA(_eaArticulations, uintptr_t, numArticulations);
uintptr_t* eaArticulations = _eaArticulations;
for(PxU32 i=0;i<numArticulations;i++)
{
ArticulationV* articulation = articulationDescs[i].articulation;
eaArticulations[i]=uintptr_t(articulation);
articulation->solverProgress = 0;
articulation->maxSolverFrictionProgress = 0;
articulation->maxSolverNormalProgress = 0;
}
ExtendedRigidBodyClassification classification(args.mBodies, numBodies, stride, eaArticulations, numArticulations);
classifyConstraintDesc(eaConstraintDescriptors, numConstraintDescriptors, classification,
constraintsPerPartition, eaTempConstraintDescriptors);
PxU32 accumulation = 0;
for(PxU32 a = 0; a < constraintsPerPartition.size(); ++a)
{
PxU32 count = constraintsPerPartition[a];
constraintsPerPartition[a] = accumulation;
accumulation += count;
}
for(PxU32 a = 0, offset = 0; a < numBodies; ++a, offset += stride)
{
PxSolverBody& body = *reinterpret_cast<PxSolverBody*>(args.mBodies+offset);
body.solverProgress = 0;
//Keep the dynamic constraint count but bump the static constraint count back to 0.
//This allows us to place the static constraints in the appropriate place when we see them
//because we know the maximum index for the dynamic constraints...
body.maxSolverFrictionProgress = 0;
}
for(PxU32 a = 0; a < numArticulations; ++a)
{
ArticulationV* articulation = reinterpret_cast<ArticulationV*>(eaArticulations[a]);
articulation->solverProgress = 0;
articulation->maxSolverFrictionProgress = 0;
}
numStaticConstraints = writeConstraintDesc(eaConstraintDescriptors, numConstraintDescriptors, classification, constraintsPerPartition,
eaTempConstraintDescriptors, eaOrderedConstraintDescriptors);
numOrderedConstraints = numConstraintDescriptors - numStaticConstraints;
/*if (!args.enhancedDeterminism)
maxPartition = normalizePartitions(constraintsPerPartition, eaOrderedConstraintDescriptors,
numOrderedConstraints, *args.mBitField, classification, numBodies, numArticulations);*/
}
const PxU32 numConstraintsDifferentBodies=numOrderedConstraints;
//PX_ASSERT(numConstraintsDifferentBodies == numConstraintDescriptors);
//Now handle the articulated self-constraints.
PxU32 totalConstraintCount = numConstraintsDifferentBodies;
args.mNumDifferentBodyConstraints=numConstraintsDifferentBodies;
args.mNumSelfConstraints=totalConstraintCount-numConstraintsDifferentBodies;
args.mNumStaticConstraints = numStaticConstraints;
//if (args.enhancedDeterminism)
{
PxU32 prevPartitionSize = 0;
maxPartition = 0;
for (PxU32 a = 0; a < constraintsPerPartition.size(); ++a, maxPartition++)
{
if (constraintsPerPartition[a] == prevPartitionSize)
break;
prevPartitionSize = constraintsPerPartition[a];
}
}
return maxPartition;
}
}
}

View File

@ -0,0 +1,75 @@
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2021 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#ifndef DY_CONSTRAINTPARTITION_H
#define DY_CONSTRAINTPARTITION_H
#include "DyDynamics.h"
namespace physx
{
namespace Dy
{
struct ConstraintPartitionArgs
{
//Input
PxU8* mBodies;
PxU32 mNumBodies;
PxU32 mStride;
ArticulationSolverDesc* mArticulationPtrs;
PxU32 mNumArticulationPtrs;
PxSolverConstraintDesc* mContactConstraintDescriptors;
PxU32 mNumContactConstraintDescriptors;
//output
PxSolverConstraintDesc* mOrderedContactConstraintDescriptors;
PxSolverConstraintDesc* mTempContactConstraintDescriptors;
PxU32 mNumDifferentBodyConstraints;
PxU32 mNumSelfConstraints;
PxU32 mNumStaticConstraints;
Ps::Array<PxU32>* mConstraintsPerPartition;
Ps::Array<PxU32>* mBitField;
bool enhancedDeterminism;
};
PxU32 partitionContactConstraints(ConstraintPartitionArgs& args);
} // namespace physx
}
#endif // DY_CONSTRAINTPARTITION_H

View File

@ -0,0 +1,102 @@
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2021 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#ifndef DY_CONSTRAINTSHADER_H
#define DY_CONSTRAINTSHADER_H
#include "DyConstraint.h"
#include "DySolverConstraintDesc.h"
#include "PsArray.h"
#define DY_ARTICULATION_MIN_RESPONSE 1e-5f
#define DY_ARTICULATION_CFM 1e-4f
#define DY_ARTICULATION_BAD_RESPONSE 0.02f
namespace physx
{
class PxcConstraintBlockStream;
class PxsConstraintBlockManager;
struct PxSolverBody;
struct PxSolverBodyData;
struct PxSolverConstraintDesc;
namespace Cm
{
struct SpatialVectorF;
}
namespace Dy
{
static const PxU32 MAX_CONSTRAINT_ROWS = 12;
struct SolverConstraintShaderPrepDesc
{
const Constraint* constraint;
PxConstraintSolverPrep solverPrep;
const void* constantBlock;
PxU32 constantBlockByteSize;
};
SolverConstraintPrepState::Enum setupSolverConstraint4
(SolverConstraintShaderPrepDesc* PX_RESTRICT constraintShaderDescs,
PxSolverConstraintPrepDesc* PX_RESTRICT constraintDescs,
const PxReal dt, const PxReal recipdt, PxU32& totalRows,
PxConstraintAllocator& allocator);
SolverConstraintPrepState::Enum setupSolverConstraint4
(PxSolverConstraintPrepDesc* PX_RESTRICT constraintDescs,
const PxReal dt, const PxReal recipdt, PxU32& totalRows,
PxConstraintAllocator& allocator, PxU32 maxRows);
PxU32 SetupSolverConstraint(SolverConstraintShaderPrepDesc& shaderDesc,
PxSolverConstraintPrepDesc& prepDesc,
PxConstraintAllocator& allocator,
PxReal dt, PxReal invdt, Cm::SpatialVectorF* Z);
class ConstraintHelper
{
public:
static PxU32 setupSolverConstraint(
PxSolverConstraintPrepDesc& prepDesc,
PxConstraintAllocator& allocator,
PxReal dt, PxReal invdt, Cm::SpatialVectorF* Z);
};
}
}
#endif //DY_CONSTRAINTSHADER_H

View File

@ -0,0 +1,599 @@
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2021 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#include "foundation/PxMemory.h"
#include "DyConstraintPrep.h"
#include "PxsRigidBody.h"
#include "DySolverConstraint1D.h"
#include "PsSort.h"
#include "DySolverConstraintDesc.h"
#include "PxcConstraintBlockStream.h"
#include "DyArticulationContactPrep.h"
#include "PsFoundation.h"
namespace physx
{
namespace Dy
{
// dsequeira:
//
// we can choose any linear combination of equality constraints and get the same solution
// Hence we can orthogonalize the constraints using the inner product given by the
// inverse mass matrix, so that when we use PGS, solving a constraint row for a joint
// don't disturb the solution of prior rows.
//
// We also eliminate the equality constraints from the hard inequality constraints -
// (essentially projecting the direction corresponding to the lagrange multiplier
// onto the equality constraint subspace) but 'til I've verified this generates
// exactly the same KKT/complementarity conditions, status is 'experimental'.
//
// since for equality constraints the resulting rows have the property that applying
// an impulse along one row doesn't alter the projected velocity along another row,
// all equality constraints (plus one inequality constraint) can be processed in parallel
// using SIMD
//
// Eliminating the inequality constraints from each other would require a solver change
// and not give us any more parallelism, although we might get better convergence.
namespace
{
PX_FORCE_INLINE Vec3V V3FromV4(Vec4V x) { return Vec3V_From_Vec4V(x); }
PX_FORCE_INLINE Vec3V V3FromV4Unsafe(Vec4V x) { return Vec3V_From_Vec4V_WUndefined(x); }
PX_FORCE_INLINE Vec4V V4FromV3(Vec3V x) { return Vec4V_From_Vec3V(x); }
//PX_FORCE_INLINE Vec4V V4ClearW(Vec4V x) { return V4SetW(x, FZero()); }
struct MassProps
{
FloatV invMass0;
FloatV invMass1;
FloatV invInertiaScale0;
FloatV invInertiaScale1;
PX_FORCE_INLINE MassProps(const PxReal imass0, const PxReal imass1, const PxConstraintInvMassScale& ims)
:
invMass0(FLoad(imass0 * ims.linear0))
, invMass1(FLoad(imass1 * ims.linear1))
, invInertiaScale0(FLoad(ims.angular0))
, invInertiaScale1(FLoad(ims.angular1))
{}
};
PX_FORCE_INLINE PxReal innerProduct(const Px1DConstraint& row0, Px1DConstraint& row1,
PxVec4& row0AngSqrtInvInertia0, PxVec4& row0AngSqrtInvInertia1,
PxVec4& row1AngSqrtInvInertia0, PxVec4& row1AngSqrtInvInertia1, const MassProps& m)
{
const Vec3V l0 = V3Mul(V3Scale(V3LoadA(row0.linear0), m.invMass0), V3LoadA(row1.linear0));
const Vec3V l1 = V3Mul(V3Scale(V3LoadA(row0.linear1), m.invMass1), V3LoadA(row1.linear1));
Vec4V r0ang0 = V4LoadA(&row0AngSqrtInvInertia0.x);
Vec4V r1ang0 = V4LoadA(&row1AngSqrtInvInertia0.x);
Vec4V r0ang1 = V4LoadA(&row0AngSqrtInvInertia1.x);
Vec4V r1ang1 = V4LoadA(&row1AngSqrtInvInertia1.x);
const Vec3V i0 = V3ScaleAdd(V3Mul(Vec3V_From_Vec4V(r0ang0), Vec3V_From_Vec4V(r1ang0)), m.invInertiaScale0, l0);
const Vec3V i1 = V3ScaleAdd(V3MulAdd(Vec3V_From_Vec4V(r0ang1), Vec3V_From_Vec4V(r1ang1), i0), m.invInertiaScale1, l1);
PxF32 f;
FStore(V3SumElems(i1), &f);
return f;
}
// indexed rotation around axis, with sine and cosine of half-angle
PX_FORCE_INLINE PxQuat indexedRotation(PxU32 axis, PxReal s, PxReal c)
{
PxQuat q(0,0,0,c);
reinterpret_cast<PxReal*>(&q)[axis] = s;
return q;
}
PxQuat diagonalize(const PxMat33& m) // jacobi rotation using quaternions
{
const PxU32 MAX_ITERS = 5;
PxQuat q = PxQuat(PxIdentity);
PxMat33 d;
for(PxU32 i=0; i < MAX_ITERS;i++)
{
const PxMat33 axes(q);
d = axes.getTranspose() * m * axes;
const PxReal d0 = PxAbs(d[1][2]), d1 = PxAbs(d[0][2]), d2 = PxAbs(d[0][1]);
const PxU32 a = PxU32(d0 > d1 && d0 > d2 ? 0 : d1 > d2 ? 1 : 2); // rotation axis index, from largest off-diagonal element
const PxU32 a1 = Ps::getNextIndex3(a), a2 = Ps::getNextIndex3(a1);
if(d[a1][a2] == 0.0f || PxAbs(d[a1][a1]-d[a2][a2]) > 2e6f*PxAbs(2.0f*d[a1][a2]))
break;
const PxReal w = (d[a1][a1]-d[a2][a2]) / (2.0f*d[a1][a2]); // cot(2 * phi), where phi is the rotation angle
const PxReal absw = PxAbs(w);
PxQuat r;
if(absw>1000)
r = indexedRotation(a, 1.0f/(4.0f*w), 1.f); // h will be very close to 1, so use small angle approx instead
else
{
const PxReal t = 1 / (absw + PxSqrt(w*w+1)); // absolute value of tan phi
const PxReal h = 1 / PxSqrt(t*t+1); // absolute value of cos phi
PX_ASSERT(h!=1); // |w|<1000 guarantees this with typical IEEE754 machine eps (approx 6e-8)
r = indexedRotation(a, PxSqrt((1-h)/2) * PxSign(w), PxSqrt((1+h)/2));
}
q = (q*r).getNormalized();
}
return q;
}
PX_FORCE_INLINE void rescale(const Mat33V& m, PxVec3& a0, PxVec3& a1, PxVec3& a2)
{
const Vec3V va0 = V3LoadU(a0);
const Vec3V va1 = V3LoadU(a1);
const Vec3V va2 = V3LoadU(a2);
const Vec3V b0 = V3ScaleAdd(va0, V3GetX(m.col0), V3ScaleAdd(va1, V3GetY(m.col0), V3Scale(va2, V3GetZ(m.col0))));
const Vec3V b1 = V3ScaleAdd(va0, V3GetX(m.col1), V3ScaleAdd(va1, V3GetY(m.col1), V3Scale(va2, V3GetZ(m.col1))));
const Vec3V b2 = V3ScaleAdd(va0, V3GetX(m.col2), V3ScaleAdd(va1, V3GetY(m.col2), V3Scale(va2, V3GetZ(m.col2))));
V3StoreU(b0, a0);
V3StoreU(b1, a1);
V3StoreU(b2, a2);
}
PX_FORCE_INLINE void rescale4(const Mat33V& m, PxReal* a0, PxReal* a1, PxReal* a2)
{
const Vec4V va0 = V4LoadA(a0);
const Vec4V va1 = V4LoadA(a1);
const Vec4V va2 = V4LoadA(a2);
const Vec4V b0 = V4ScaleAdd(va0, V3GetX(m.col0), V4ScaleAdd(va1, V3GetY(m.col0), V4Scale(va2, V3GetZ(m.col0))));
const Vec4V b1 = V4ScaleAdd(va0, V3GetX(m.col1), V4ScaleAdd(va1, V3GetY(m.col1), V4Scale(va2, V3GetZ(m.col1))));
const Vec4V b2 = V4ScaleAdd(va0, V3GetX(m.col2), V4ScaleAdd(va1, V3GetY(m.col2), V4Scale(va2, V3GetZ(m.col2))));
V4StoreA(b0, a0);
V4StoreA(b1, a1);
V4StoreA(b2, a2);
}
void diagonalize(Px1DConstraint** row,
PxVec4* angSqrtInvInertia0,
PxVec4* angSqrtInvInertia1,
const MassProps &m)
{
PxReal a00 = innerProduct(*row[0], *row[0], angSqrtInvInertia0[0], angSqrtInvInertia1[0], angSqrtInvInertia0[0], angSqrtInvInertia1[0], m);
PxReal a01 = innerProduct(*row[0], *row[1], angSqrtInvInertia0[0], angSqrtInvInertia1[0], angSqrtInvInertia0[1], angSqrtInvInertia1[1], m);
PxReal a02 = innerProduct(*row[0], *row[2], angSqrtInvInertia0[0], angSqrtInvInertia1[0], angSqrtInvInertia0[2], angSqrtInvInertia1[2], m);
PxReal a11 = innerProduct(*row[1], *row[1], angSqrtInvInertia0[1], angSqrtInvInertia1[1], angSqrtInvInertia0[1], angSqrtInvInertia1[1], m);
PxReal a12 = innerProduct(*row[1], *row[2], angSqrtInvInertia0[1], angSqrtInvInertia1[1], angSqrtInvInertia0[2], angSqrtInvInertia1[2], m);
PxReal a22 = innerProduct(*row[2], *row[2], angSqrtInvInertia0[2], angSqrtInvInertia1[2], angSqrtInvInertia0[2], angSqrtInvInertia1[2], m);
PxMat33 a(PxVec3(a00, a01, a02),
PxVec3(a01, a11, a12),
PxVec3(a02, a12, a22));
PxQuat q = diagonalize(a);
PxMat33 n(-q);
Mat33V mn(V3LoadU(n.column0), V3LoadU(n.column1), V3LoadU(n.column2));
//KS - We treat as a Vec4V so that we get geometricError rescaled for free along with linear0
rescale4(mn, &row[0]->linear0.x, &row[1]->linear0.x, &row[2]->linear0.x);
rescale(mn, row[0]->linear1, row[1]->linear1, row[2]->linear1);
//KS - We treat as a PxVec4 so that we get velocityTarget rescaled for free
rescale4(mn, &row[0]->angular0.x, &row[1]->angular0.x, &row[2]->angular0.x);
rescale(mn, row[0]->angular1, row[1]->angular1, row[2]->angular1);
rescale4(mn, &angSqrtInvInertia0[0].x, &angSqrtInvInertia0[1].x, &angSqrtInvInertia0[2].x);
rescale4(mn, &angSqrtInvInertia1[0].x, &angSqrtInvInertia1[1].x, &angSqrtInvInertia1[2].x);
}
void orthogonalize(Px1DConstraint** row,
PxVec4* angSqrtInvInertia0,
PxVec4* angSqrtInvInertia1,
PxU32 rowCount,
PxU32 eqRowCount,
const MassProps &m)
{
PX_ASSERT(eqRowCount<=6);
const FloatV zero = FZero();
Vec3V lin1m[6], ang1m[6], lin1[6], ang1[6];
Vec4V lin0m[6], ang0m[6]; // must have 0 in the W-field
Vec4V lin0AndG[6], ang0AndT[6];
for(PxU32 i=0;i<rowCount;i++)
{
Vec4V l0AndG = V4LoadA(&row[i]->linear0.x); // linear0 and geometric error
Vec4V a0AndT = V4LoadA(&row[i]->angular0.x); // angular0 and velocity target
Vec3V l1 = V3FromV4(V4LoadA(&row[i]->linear1.x));
Vec3V a1 = V3FromV4(V4LoadA(&row[i]->angular1.x));
Vec4V angSqrtL0 = V4LoadA(&angSqrtInvInertia0[i].x);
Vec4V angSqrtL1 = V4LoadA(&angSqrtInvInertia1[i].x);
PxU32 eliminationRows = PxMin<PxU32>(i, eqRowCount);
for(PxU32 j=0;j<eliminationRows;j++)
{
const Vec3V s0 = V3MulAdd(l1, lin1m[j], V3FromV4Unsafe(V4Mul(l0AndG, lin0m[j])));
const Vec3V s1 = V3MulAdd(V3FromV4Unsafe(angSqrtL1), ang1m[j], V3FromV4Unsafe(V4Mul(angSqrtL0, ang0m[j])));
FloatV t = V3SumElems(V3Add(s0, s1));
l0AndG = V4NegScaleSub(lin0AndG[j], t, l0AndG);
a0AndT = V4NegScaleSub(ang0AndT[j], t, a0AndT);
l1 = V3NegScaleSub(lin1[j], t, l1);
a1 = V3NegScaleSub(ang1[j], t, a1);
angSqrtL0 = V4NegScaleSub(V4LoadA(&angSqrtInvInertia0[j].x), t, angSqrtL0);
angSqrtL1 = V4NegScaleSub(V4LoadA(&angSqrtInvInertia1[j].x), t, angSqrtL1);
}
V4StoreA(l0AndG, &row[i]->linear0.x);
V4StoreA(a0AndT, &row[i]->angular0.x);
V3StoreA(l1, row[i]->linear1);
V3StoreA(a1, row[i]->angular1);
V4StoreA(angSqrtL0, &angSqrtInvInertia0[i].x);
V4StoreA(angSqrtL1, &angSqrtInvInertia1[i].x);
if(i<eqRowCount)
{
lin0AndG[i] = l0AndG;
ang0AndT[i] = a0AndT;
lin1[i] = l1;
ang1[i] = a1;
const Vec3V l0 = V3FromV4(l0AndG);
const Vec3V l0m = V3Scale(l0, m.invMass0);
const Vec3V l1m = V3Scale(l1, m.invMass1);
const Vec4V a0m = V4Scale(angSqrtL0, m.invInertiaScale0);
const Vec4V a1m = V4Scale(angSqrtL1, m.invInertiaScale1);
const Vec3V s0 = V3MulAdd(l0, l0m, V3Mul(l1, l1m));
const Vec4V s1 = V4MulAdd(a0m, angSqrtL0, V4Mul(a1m, angSqrtL1));
const FloatV s = V3SumElems(V3Add(s0, V3FromV4Unsafe(s1)));
const FloatV a = FSel(FIsGrtr(s, zero), FRecip(s), zero); // with mass scaling, it's possible for the inner product of a row to be zero
lin0m[i] = V4Scale(V4ClearW(V4FromV3(l0m)), a);
ang0m[i] = V4Scale(V4ClearW(a0m), a);
lin1m[i] = V3Scale(l1m, a);
ang1m[i] = V3Scale(V3FromV4Unsafe(a1m), a);
}
}
}
}
void preprocessRows(Px1DConstraint** sorted,
Px1DConstraint* rows,
PxVec4* angSqrtInvInertia0,
PxVec4* angSqrtInvInertia1,
PxU32 rowCount,
const PxMat33& sqrtInvInertia0F32,
const PxMat33& sqrtInvInertia1F32,
const PxReal invMass0,
const PxReal invMass1,
const PxConstraintInvMassScale& ims,
bool disablePreprocessing,
bool diagonalizeDrive,
bool preprocessLinear)
{
// j is maxed at 12, typically around 7, so insertion sort is fine
for(PxU32 i=0; i<rowCount; i++)
{
Px1DConstraint* r = rows+i;
PxU32 j = i;
for(;j>0 && r->solveHint < sorted[j-1]->solveHint; j--)
sorted[j] = sorted[j-1];
sorted[j] = r;
}
for(PxU32 i=0;i<rowCount-1;i++)
PX_ASSERT(sorted[i]->solveHint <= sorted[i+1]->solveHint);
for (PxU32 i = 0; i<rowCount; i++)
rows[i].forInternalUse = rows[i].flags & Px1DConstraintFlag::eKEEPBIAS ? rows[i].geometricError : 0;
const Mat33V sqrtInvInertia0 = Mat33V(V3LoadU(sqrtInvInertia0F32.column0), V3LoadU(sqrtInvInertia0F32.column1),
V3LoadU(sqrtInvInertia0F32.column2));
const Mat33V sqrtInvInertia1 = Mat33V(V3LoadU(sqrtInvInertia1F32.column0), V3LoadU(sqrtInvInertia1F32.column1),
V3LoadU(sqrtInvInertia1F32.column2));
PX_ASSERT(((uintptr_t(angSqrtInvInertia0)) & 0xF) == 0);
PX_ASSERT(((uintptr_t(angSqrtInvInertia1)) & 0xF) == 0);
for(PxU32 i = 0; i < rowCount; ++i)
{
const Vec3V angDelta0 = M33MulV3(sqrtInvInertia0, V3LoadU(sorted[i]->angular0));
const Vec3V angDelta1 = M33MulV3(sqrtInvInertia1, V3LoadU(sorted[i]->angular1));
V4StoreA(Vec4V_From_Vec3V(angDelta0), &angSqrtInvInertia0[i].x);
V4StoreA(Vec4V_From_Vec3V(angDelta1), &angSqrtInvInertia1[i].x);
}
if(disablePreprocessing)
return;
MassProps m(invMass0, invMass1, ims);
for(PxU32 i=0;i<rowCount;)
{
const PxU32 groupMajorId = PxU32(sorted[i]->solveHint>>8), start = i++;
while(i<rowCount && PxU32(sorted[i]->solveHint>>8) == groupMajorId)
i++;
if(groupMajorId == 4 || (groupMajorId == 8 && preprocessLinear))
{
PxU32 bCount = start; // count of bilateral constraints
for(; bCount<i && (sorted[bCount]->solveHint&255)==0; bCount++)
;
orthogonalize(sorted+start, angSqrtInvInertia0+start, angSqrtInvInertia1+start, i-start, bCount-start, m);
}
if(groupMajorId == 1 && diagonalizeDrive)
{
PxU32 slerp = start; // count of bilateral constraints
for(; slerp<i && (sorted[slerp]->solveHint&255)!=2; slerp++)
;
if(slerp+3 == i)
diagonalize(sorted+slerp, angSqrtInvInertia0+slerp, angSqrtInvInertia1+slerp, m);
PX_ASSERT(i-start==3);
diagonalize(sorted+start, angSqrtInvInertia0+start, angSqrtInvInertia1+start, m);
}
}
}
PxU32 ConstraintHelper::setupSolverConstraint(
PxSolverConstraintPrepDesc& prepDesc,
PxConstraintAllocator& allocator,
PxReal dt, PxReal invdt,
Cm::SpatialVectorF* Z)
{
if (prepDesc.numRows == 0)
{
prepDesc.desc->constraint = NULL;
prepDesc.desc->writeBack = NULL;
prepDesc.desc->constraintLengthOver16 = 0;
prepDesc.desc->writeBackLengthOver4 = 0;
return 0;
}
PxSolverConstraintDesc& desc = *prepDesc.desc;
bool isExtended = desc.linkIndexA != PxSolverConstraintDesc::NO_LINK
|| desc.linkIndexB != PxSolverConstraintDesc::NO_LINK;
PxU32 stride = isExtended ? sizeof(SolverConstraint1DExt) : sizeof(SolverConstraint1D);
const PxU32 constraintLength = sizeof(SolverConstraint1DHeader) + stride * prepDesc.numRows;
//KS - +16 is for the constraint progress counter, which needs to be the last element in the constraint (so that we
//know SPU DMAs have completed)
PxU8* ptr = allocator.reserveConstraintData(constraintLength + 16u);
if(NULL == ptr || (reinterpret_cast<PxU8*>(-1))==ptr)
{
if(NULL==ptr)
{
PX_WARN_ONCE(
"Reached limit set by PxSceneDesc::maxNbContactDataBlocks - ran out of buffer space for constraint prep. "
"Either accept joints detaching/exploding or increase buffer size allocated for constraint prep by increasing PxSceneDesc::maxNbContactDataBlocks.");
return 0;
}
else
{
PX_WARN_ONCE(
"Attempting to allocate more than 16K of constraint data. "
"Either accept joints detaching/exploding or simplify constraints.");
ptr=NULL;
return 0;
}
}
desc.constraint = ptr;
setConstraintLength(desc,constraintLength);
desc.writeBack = prepDesc.writeback;
setWritebackLength(desc, sizeof(ConstraintWriteback));
memset(desc.constraint, 0, constraintLength);
SolverConstraint1DHeader* header = reinterpret_cast<SolverConstraint1DHeader*>(desc.constraint);
PxU8* constraints = desc.constraint + sizeof(SolverConstraint1DHeader);
init(*header, Ps::to8(prepDesc.numRows), isExtended, prepDesc.invMassScales);
header->body0WorldOffset = prepDesc.body0WorldOffset;
header->linBreakImpulse = prepDesc.linBreakForce * dt;
header->angBreakImpulse = prepDesc.angBreakForce * dt;
header->breakable = PxU8((prepDesc.linBreakForce != PX_MAX_F32) || (prepDesc.angBreakForce != PX_MAX_F32));
header->invMass0D0 = prepDesc.data0->invMass * prepDesc.invMassScales.linear0;
header->invMass1D1 = prepDesc.data1->invMass * prepDesc.invMassScales.linear1;
PX_ALIGN(16, PxVec4) angSqrtInvInertia0[MAX_CONSTRAINT_ROWS];
PX_ALIGN(16, PxVec4) angSqrtInvInertia1[MAX_CONSTRAINT_ROWS];
Px1DConstraint* sorted[MAX_CONSTRAINT_ROWS];
preprocessRows(sorted, prepDesc.rows, angSqrtInvInertia0, angSqrtInvInertia1, prepDesc.numRows,
prepDesc.data0->sqrtInvInertia, prepDesc.data1->sqrtInvInertia, prepDesc.data0->invMass, prepDesc.data1->invMass,
prepDesc.invMassScales, isExtended || prepDesc.disablePreprocessing, prepDesc.improvedSlerp, true);
const PxReal erp = 1.0f;
for (PxU32 i = 0; i<prepDesc.numRows; i++)
{
Ps::prefetchLine(constraints, 128);
SolverConstraint1D &s = *reinterpret_cast<SolverConstraint1D *>(constraints);
Px1DConstraint& c = *sorted[i];
PxReal driveScale = c.flags&Px1DConstraintFlag::eHAS_DRIVE_LIMIT && prepDesc.driveLimitsAreForces ? PxMin(dt, 1.0f) : 1.0f;
PxReal unitResponse;
PxReal normalVel = 0.0f;
PxReal initVel = 0.f;
PxReal minResponseThreshold = prepDesc.minResponseThreshold;
if(!isExtended)
{
init(s, c.linear0, c.linear1, PxVec3(angSqrtInvInertia0[i].x, angSqrtInvInertia0[i].y, angSqrtInvInertia0[i].z),
PxVec3(angSqrtInvInertia1[i].x, angSqrtInvInertia1[i].y, angSqrtInvInertia1[i].z), c.minImpulse * driveScale, c.maxImpulse * driveScale);
s.ang0Writeback = c.angular0;
PxReal resp0 = s.lin0.magnitudeSquared() * prepDesc.data0->invMass * prepDesc.invMassScales.linear0 + s.ang0.magnitudeSquared() * prepDesc.invMassScales.angular0;
PxReal resp1 = s.lin1.magnitudeSquared() * prepDesc.data1->invMass * prepDesc.invMassScales.linear1 + s.ang1.magnitudeSquared() * prepDesc.invMassScales.angular1;
unitResponse = resp0 + resp1;
initVel = normalVel = prepDesc.data0->projectVelocity(c.linear0, c.angular0) - prepDesc.data1->projectVelocity(c.linear1, c.angular1);
}
else
{
init(s, c.linear0, c.linear1, c.angular0, c.angular1, c.minImpulse * driveScale, c.maxImpulse * driveScale);
SolverConstraint1DExt& e = static_cast<SolverConstraint1DExt&>(s);
const SolverExtBody eb0(reinterpret_cast<const void*>(prepDesc.body0), prepDesc.data0, desc.linkIndexA);
const SolverExtBody eb1(reinterpret_cast<const void*>(prepDesc.body1), prepDesc.data1, desc.linkIndexB);
const Cm::SpatialVector resp0 = createImpulseResponseVector(e.lin0, e.ang0, eb0);
const Cm::SpatialVector resp1 = createImpulseResponseVector(-e.lin1, -e.ang1, eb1);
unitResponse = getImpulseResponse(eb0, resp0, unsimdRef(e.deltaVA), prepDesc.invMassScales.linear0, prepDesc.invMassScales.angular0,
eb1, resp1, unsimdRef(e.deltaVB), prepDesc.invMassScales.linear1, prepDesc.invMassScales.angular1, Z, false);
//Add CFM term!
/*if(unitResponse > DY_ARTICULATION_MIN_RESPONSE)
unitResponse += DY_ARTICULATION_CFM;*/
s.ang0Writeback = c.angular0;
s.lin0 = resp0.linear;
s.ang0 = resp0.angular;
s.lin1 = -resp1.linear;
s.ang1 = -resp1.angular;
PxReal vel0, vel1;
if(needsNormalVel(c) || eb0.mLinkIndex == PxSolverConstraintDesc::NO_LINK || eb1.mLinkIndex == PxSolverConstraintDesc::NO_LINK)
{
vel0 = eb0.projectVelocity(c.linear0, c.angular0);
vel1 = eb1.projectVelocity(c.linear1, c.angular1);
normalVel = vel0 - vel1;
//normalVel = eb0.projectVelocity(s.lin0, s.ang0) - eb1.projectVelocity(s.lin1, s.ang1);
if(eb0.mLinkIndex == PxSolverConstraintDesc::NO_LINK)
initVel = vel0;
else if(eb1.mLinkIndex == PxSolverConstraintDesc::NO_LINK)
initVel = -vel1;
}
minResponseThreshold = PxMax(minResponseThreshold, DY_ARTICULATION_MIN_RESPONSE);
}
setSolverConstants(s.constant, s.unbiasedConstant, s.velMultiplier, s.impulseMultiplier,
c, normalVel, unitResponse, minResponseThreshold, erp, dt, invdt);
//s.targetVelocity = initVel;
const PxReal velBias = initVel * s.velMultiplier;
s.constant += velBias;
s.unbiasedConstant += velBias;
if(c.flags & Px1DConstraintFlag::eOUTPUT_FORCE)
s.flags |= DY_SC_FLAG_OUTPUT_FORCE;
constraints += stride;
}
//KS - Set the solve count at the end to 0
*(reinterpret_cast<PxU32*>(constraints)) = 0;
*(reinterpret_cast<PxU32*>(constraints + 4)) = 0;
PX_ASSERT(desc.constraint + getConstraintLength(desc) == constraints);
return prepDesc.numRows;
}
PxU32 SetupSolverConstraint(SolverConstraintShaderPrepDesc& shaderDesc,
PxSolverConstraintPrepDesc& prepDesc,
PxConstraintAllocator& allocator,
PxReal dt, PxReal invdt, Cm::SpatialVectorF* Z)
{
// LL shouldn't see broken constraints
PX_ASSERT(!(reinterpret_cast<ConstraintWriteback*>(prepDesc.writeback)->broken));
setConstraintLength(*prepDesc.desc, 0);
if (!shaderDesc.solverPrep)
return 0;
//PxU32 numAxisConstraints = 0;
Px1DConstraint rows[MAX_CONSTRAINT_ROWS];
// This is necessary so that there will be sensible defaults and shaders will
// continue to work (albeit with a recompile) if the row format changes.
// It's a bit inefficient because it fills in all constraint rows even if there
// is only going to be one generated. A way around this would be for the shader to
// specify the maximum number of rows it needs, or it could call a subroutine to
// prep the row before it starts filling it it.
PxMemZero(rows, sizeof(Px1DConstraint)*MAX_CONSTRAINT_ROWS);
for (PxU32 i = 0; i<MAX_CONSTRAINT_ROWS; i++)
{
Px1DConstraint& c = rows[i];
//Px1DConstraintInit(c);
c.minImpulse = -PX_MAX_REAL;
c.maxImpulse = PX_MAX_REAL;
}
prepDesc.invMassScales.linear0 = prepDesc.invMassScales.linear1 = prepDesc.invMassScales.angular0 = prepDesc.invMassScales.angular1 = 1.f;
PxVec3 body0WorldOffset(0.f);
PxVec3 ra, rb;
PxU32 constraintCount = (*shaderDesc.solverPrep)(rows,
body0WorldOffset,
MAX_CONSTRAINT_ROWS,
prepDesc.invMassScales,
shaderDesc.constantBlock,
prepDesc.bodyFrame0, prepDesc.bodyFrame1, prepDesc.extendedLimits, ra, rb);
prepDesc.rows = rows;
prepDesc.numRows = constraintCount;
prepDesc.body0WorldOffset = body0WorldOffset;
return ConstraintHelper::setupSolverConstraint(prepDesc, allocator, dt, invdt, Z);
}
}
}

View File

@ -0,0 +1,541 @@
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2021 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#include "foundation/PxMemory.h"
#include "DyConstraintPrep.h"
#include "PxsRigidBody.h"
#include "DySolverConstraint1D.h"
#include "DySolverConstraint1D4.h"
#include "PsSort.h"
#include "PxcConstraintBlockStream.h"
#include "DyArticulationContactPrep.h"
#include "PsFoundation.h"
namespace physx
{
namespace Dy
{
void preprocessRows(Px1DConstraint** sorted,
Px1DConstraint* rows,
PxVec4* angSqrtInvInertia0,
PxVec4* angSqrtInvInertia1,
PxU32 rowCount,
const PxMat33& sqrtInvInertia0F32,
const PxMat33& sqrtInvInertia1F32,
const PxReal invMass0,
const PxReal invMass1,
const PxConstraintInvMassScale& ims,
bool disablePreprocessing,
bool diagonalizeDrive,
bool preprocessLinear = true);
namespace
{
void setConstants(PxReal& constant, PxReal& unbiasedConstant, PxReal& velMultiplier, PxReal& impulseMultiplier,
const Px1DConstraint& c, PxReal unitResponse, PxReal minRowResponse, PxReal erp, PxReal dt, PxReal recipdt,
const PxSolverBodyData& b0, const PxSolverBodyData& b1, const bool finished)
{
if(finished)
{
constant = 0.f;
unbiasedConstant = 0.f;
velMultiplier = 0.f;
impulseMultiplier = 0.f;
return;
}
PxReal nv = needsNormalVel(c) ? b0.projectVelocity(c.linear0, c.angular0) - b1.projectVelocity(c.linear1, c.angular1)
: 0;
setSolverConstants(constant, unbiasedConstant, velMultiplier, impulseMultiplier,
c, nv, unitResponse, minRowResponse, erp, dt, recipdt);
}
}
SolverConstraintPrepState::Enum setupSolverConstraint4
(PxSolverConstraintPrepDesc* PX_RESTRICT constraintDescs,
const PxReal dt, const PxReal recipdt, PxU32& totalRows,
PxConstraintAllocator& allocator, PxU32 maxRows);
SolverConstraintPrepState::Enum setupSolverConstraint4
(SolverConstraintShaderPrepDesc* PX_RESTRICT constraintShaderDescs,
PxSolverConstraintPrepDesc* PX_RESTRICT constraintDescs,
const PxReal dt, const PxReal recipdt, PxU32& totalRows,
PxConstraintAllocator& allocator)
{
//KS - we will never get here with constraints involving articulations so we don't need to stress about those in here
totalRows = 0;
Px1DConstraint allRows[MAX_CONSTRAINT_ROWS * 4];
PxU32 numRows = 0;
PxU32 maxRows = 0;
PxU32 preppedIndex = 0;
for (PxU32 a = 0; a < 4; ++a)
{
Px1DConstraint* rows = allRows + numRows;
SolverConstraintShaderPrepDesc& shaderDesc = constraintShaderDescs[a];
PxSolverConstraintPrepDesc& desc = constraintDescs[a];
if (!shaderDesc.solverPrep)
return SolverConstraintPrepState::eUNBATCHABLE;
PxMemZero(rows + preppedIndex, sizeof(Px1DConstraint)*(MAX_CONSTRAINT_ROWS));
for (PxU32 b = preppedIndex; b < MAX_CONSTRAINT_ROWS; ++b)
{
Px1DConstraint& c = rows[b];
//Px1DConstraintInit(c);
c.minImpulse = -PX_MAX_REAL;
c.maxImpulse = PX_MAX_REAL;
}
desc.invMassScales.linear0 = desc.invMassScales.linear1 = desc.invMassScales.angular0 = desc.invMassScales.angular1 = 1.f;
desc.body0WorldOffset = PxVec3(0.f);
PxVec3 ra, rb;
PxU32 constraintCount = (*shaderDesc.solverPrep)(rows,
desc.body0WorldOffset,
MAX_CONSTRAINT_ROWS,
desc.invMassScales,
shaderDesc.constantBlock,
desc.bodyFrame0, desc.bodyFrame1, desc.extendedLimits, ra, rb);
preppedIndex = MAX_CONSTRAINT_ROWS - constraintCount;
maxRows = PxMax(constraintCount, maxRows);
if (constraintCount == 0)
return SolverConstraintPrepState::eUNBATCHABLE;
desc.rows = rows;
desc.numRows = constraintCount;
numRows += constraintCount;
}
return setupSolverConstraint4(constraintDescs, dt, recipdt, totalRows, allocator, maxRows);
}
SolverConstraintPrepState::Enum setupSolverConstraint4
(PxSolverConstraintPrepDesc* PX_RESTRICT constraintDescs,
const PxReal dt, const PxReal recipdt, PxU32& totalRows,
PxConstraintAllocator& allocator, PxU32 maxRows)
{
const Vec4V zero = V4Zero();
Px1DConstraint* allSorted[MAX_CONSTRAINT_ROWS * 4];
PxU32 startIndex[4];
PX_ALIGN(16, PxVec4) angSqrtInvInertia0[MAX_CONSTRAINT_ROWS * 4];
PX_ALIGN(16, PxVec4) angSqrtInvInertia1[MAX_CONSTRAINT_ROWS * 4];
PxU32 numRows = 0;
for (PxU32 a = 0; a < 4; ++a)
{
startIndex[a] = numRows;
PxSolverConstraintPrepDesc& desc = constraintDescs[a];
Px1DConstraint** sorted = allSorted + numRows;
preprocessRows(sorted, desc.rows, angSqrtInvInertia0 + numRows, angSqrtInvInertia1 + numRows, desc.numRows,
desc.data0->sqrtInvInertia, desc.data1->sqrtInvInertia, desc.data0->invMass, desc.data1->invMass,
desc.invMassScales, desc.disablePreprocessing, desc.improvedSlerp);
numRows += desc.numRows;
}
PxU32 stride = sizeof(SolverConstraint1DDynamic4);
const PxU32 constraintLength = sizeof(SolverConstraint1DHeader4) + stride * maxRows;
//KS - +16 is for the constraint progress counter, which needs to be the last element in the constraint (so that we
//know SPU DMAs have completed)
PxU8* ptr = allocator.reserveConstraintData(constraintLength + 16u);
if(NULL == ptr || (reinterpret_cast<PxU8*>(-1))==ptr)
{
for(PxU32 a = 0; a < 4; ++a)
{
PxSolverConstraintPrepDesc& desc = constraintDescs[a];
desc.desc->constraint = NULL;
setConstraintLength(*desc.desc, 0);
desc.desc->writeBack = desc.writeback;
}
if(NULL==ptr)
{
PX_WARN_ONCE(
"Reached limit set by PxSceneDesc::maxNbContactDataBlocks - ran out of buffer space for constraint prep. "
"Either accept joints detaching/exploding or increase buffer size allocated for constraint prep by increasing PxSceneDesc::maxNbContactDataBlocks.");
return SolverConstraintPrepState::eOUT_OF_MEMORY;
}
else
{
PX_WARN_ONCE(
"Attempting to allocate more than 16K of constraint data. "
"Either accept joints detaching/exploding or simplify constraints.");
ptr=NULL;
return SolverConstraintPrepState::eOUT_OF_MEMORY;
}
}
//desc.constraint = ptr;
totalRows = numRows;
for(PxU32 a = 0; a < 4; ++a)
{
PxSolverConstraintPrepDesc& desc = constraintDescs[a];
desc.desc->constraint = ptr;
setConstraintLength(*desc.desc, constraintLength);
desc.desc->writeBack = desc.writeback;
}
const PxReal erp[4] = { 1.0f, 1.0f, 1.0f, 1.0f};
//OK, now we build all 4 constraints into a single set of rows
{
PxU8* currPtr = ptr;
SolverConstraint1DHeader4* header = reinterpret_cast<SolverConstraint1DHeader4*>(currPtr);
currPtr += sizeof(SolverConstraint1DHeader4);
const PxSolverBodyData& bd00 = *constraintDescs[0].data0;
const PxSolverBodyData& bd01 = *constraintDescs[1].data0;
const PxSolverBodyData& bd02 = *constraintDescs[2].data0;
const PxSolverBodyData& bd03 = *constraintDescs[3].data0;
const PxSolverBodyData& bd10 = *constraintDescs[0].data1;
const PxSolverBodyData& bd11 = *constraintDescs[1].data1;
const PxSolverBodyData& bd12 = *constraintDescs[2].data1;
const PxSolverBodyData& bd13 = *constraintDescs[3].data1;
//Load up masses, invInertia, velocity etc.
const Vec4V invMassScale0 = V4LoadXYZW(constraintDescs[0].invMassScales.linear0, constraintDescs[1].invMassScales.linear0,
constraintDescs[2].invMassScales.linear0, constraintDescs[3].invMassScales.linear0);
const Vec4V invMassScale1 = V4LoadXYZW(constraintDescs[0].invMassScales.linear1, constraintDescs[1].invMassScales.linear1,
constraintDescs[2].invMassScales.linear1, constraintDescs[3].invMassScales.linear1);
const Vec4V iMass0 = V4LoadXYZW(bd00.invMass, bd01.invMass, bd02.invMass, bd03.invMass);
const Vec4V iMass1 = V4LoadXYZW(bd10.invMass, bd11.invMass, bd12.invMass, bd13.invMass);
const Vec4V invMass0 = V4Mul(iMass0, invMassScale0);
const Vec4V invMass1 = V4Mul(iMass1, invMassScale1);
const Vec4V invInertiaScale0 = V4LoadXYZW(constraintDescs[0].invMassScales.angular0, constraintDescs[1].invMassScales.angular0,
constraintDescs[2].invMassScales.angular0, constraintDescs[3].invMassScales.angular0);
const Vec4V invInertiaScale1 = V4LoadXYZW(constraintDescs[0].invMassScales.angular1, constraintDescs[1].invMassScales.angular1,
constraintDescs[2].invMassScales.angular1, constraintDescs[3].invMassScales.angular1);
//Velocities
Vec4V linVel00 = V4LoadA(&bd00.linearVelocity.x);
Vec4V linVel01 = V4LoadA(&bd10.linearVelocity.x);
Vec4V angVel00 = V4LoadA(&bd00.angularVelocity.x);
Vec4V angVel01 = V4LoadA(&bd10.angularVelocity.x);
Vec4V linVel10 = V4LoadA(&bd01.linearVelocity.x);
Vec4V linVel11 = V4LoadA(&bd11.linearVelocity.x);
Vec4V angVel10 = V4LoadA(&bd01.angularVelocity.x);
Vec4V angVel11 = V4LoadA(&bd11.angularVelocity.x);
Vec4V linVel20 = V4LoadA(&bd02.linearVelocity.x);
Vec4V linVel21 = V4LoadA(&bd12.linearVelocity.x);
Vec4V angVel20 = V4LoadA(&bd02.angularVelocity.x);
Vec4V angVel21 = V4LoadA(&bd12.angularVelocity.x);
Vec4V linVel30 = V4LoadA(&bd03.linearVelocity.x);
Vec4V linVel31 = V4LoadA(&bd13.linearVelocity.x);
Vec4V angVel30 = V4LoadA(&bd03.angularVelocity.x);
Vec4V angVel31 = V4LoadA(&bd13.angularVelocity.x);
Vec4V linVel0T0, linVel0T1, linVel0T2;
Vec4V linVel1T0, linVel1T1, linVel1T2;
Vec4V angVel0T0, angVel0T1, angVel0T2;
Vec4V angVel1T0, angVel1T1, angVel1T2;
PX_TRANSPOSE_44_34(linVel00, linVel10, linVel20, linVel30, linVel0T0, linVel0T1, linVel0T2);
PX_TRANSPOSE_44_34(linVel01, linVel11, linVel21, linVel31, linVel1T0, linVel1T1, linVel1T2);
PX_TRANSPOSE_44_34(angVel00, angVel10, angVel20, angVel30, angVel0T0, angVel0T1, angVel0T2);
PX_TRANSPOSE_44_34(angVel01, angVel11, angVel21, angVel31, angVel1T0, angVel1T1, angVel1T2);
//body world offsets
Vec4V workOffset0 = Vec4V_From_Vec3V(V3LoadU(constraintDescs[0].body0WorldOffset));
Vec4V workOffset1 = Vec4V_From_Vec3V(V3LoadU(constraintDescs[1].body0WorldOffset));
Vec4V workOffset2 = Vec4V_From_Vec3V(V3LoadU(constraintDescs[2].body0WorldOffset));
Vec4V workOffset3 = Vec4V_From_Vec3V(V3LoadU(constraintDescs[3].body0WorldOffset));
Vec4V workOffsetX, workOffsetY, workOffsetZ;
PX_TRANSPOSE_44_34(workOffset0, workOffset1, workOffset2, workOffset3, workOffsetX, workOffsetY, workOffsetZ);
const FloatV dtV = FLoad(dt);
Vec4V linBreakForce = V4LoadXYZW(constraintDescs[0].linBreakForce, constraintDescs[1].linBreakForce,
constraintDescs[2].linBreakForce, constraintDescs[3].linBreakForce);
Vec4V angBreakForce = V4LoadXYZW(constraintDescs[0].angBreakForce, constraintDescs[1].angBreakForce,
constraintDescs[2].angBreakForce, constraintDescs[3].angBreakForce);
header->break0 = PxU8((constraintDescs[0].linBreakForce != PX_MAX_F32) || (constraintDescs[0].angBreakForce != PX_MAX_F32));
header->break1 = PxU8((constraintDescs[1].linBreakForce != PX_MAX_F32) || (constraintDescs[1].angBreakForce != PX_MAX_F32));
header->break2 = PxU8((constraintDescs[2].linBreakForce != PX_MAX_F32) || (constraintDescs[2].angBreakForce != PX_MAX_F32));
header->break3 = PxU8((constraintDescs[3].linBreakForce != PX_MAX_F32) || (constraintDescs[3].angBreakForce != PX_MAX_F32));
//OK, I think that's everything loaded in
header->invMass0D0 = invMass0;
header->invMass1D1 = invMass1;
header->angD0 = invInertiaScale0;
header->angD1 = invInertiaScale1;
header->body0WorkOffsetX = workOffsetX;
header->body0WorkOffsetY = workOffsetY;
header->body0WorkOffsetZ = workOffsetZ;
header->count = maxRows;
header->type = DY_SC_TYPE_BLOCK_1D;
header->linBreakImpulse = V4Scale(linBreakForce, dtV);
header->angBreakImpulse = V4Scale(angBreakForce, dtV);
header->count0 = Ps::to8(constraintDescs[0].numRows);
header->count1 = Ps::to8(constraintDescs[1].numRows);
header->count2 = Ps::to8(constraintDescs[2].numRows);
header->count3 = Ps::to8(constraintDescs[3].numRows);
//Now we loop over the constraints and build the results...
PxU32 index0 = 0;
PxU32 endIndex0 = constraintDescs[0].numRows - 1;
PxU32 index1 = startIndex[1];
PxU32 endIndex1 = index1 + constraintDescs[1].numRows - 1;
PxU32 index2 = startIndex[2];
PxU32 endIndex2 = index2 + constraintDescs[2].numRows - 1;
PxU32 index3 = startIndex[3];
PxU32 endIndex3 = index3 + constraintDescs[3].numRows - 1;
const FloatV one = FOne();
for(PxU32 a = 0; a < maxRows; ++a)
{
SolverConstraint1DDynamic4* c = reinterpret_cast<SolverConstraint1DDynamic4*>(currPtr);
currPtr += stride;
Px1DConstraint* con0 = allSorted[index0];
Px1DConstraint* con1 = allSorted[index1];
Px1DConstraint* con2 = allSorted[index2];
Px1DConstraint* con3 = allSorted[index3];
Vec4V cangDelta00 = V4LoadA(&angSqrtInvInertia0[index0].x);
Vec4V cangDelta01 = V4LoadA(&angSqrtInvInertia0[index1].x);
Vec4V cangDelta02 = V4LoadA(&angSqrtInvInertia0[index2].x);
Vec4V cangDelta03 = V4LoadA(&angSqrtInvInertia0[index3].x);
Vec4V cangDelta10 = V4LoadA(&angSqrtInvInertia1[index0].x);
Vec4V cangDelta11 = V4LoadA(&angSqrtInvInertia1[index1].x);
Vec4V cangDelta12 = V4LoadA(&angSqrtInvInertia1[index2].x);
Vec4V cangDelta13 = V4LoadA(&angSqrtInvInertia1[index3].x);
index0 = index0 == endIndex0 ? index0 : index0 + 1;
index1 = index1 == endIndex1 ? index1 : index1 + 1;
index2 = index2 == endIndex2 ? index2 : index2 + 1;
index3 = index3 == endIndex3 ? index3 : index3 + 1;
Vec4V driveScale = V4Splat(one);
if (con0->flags&Px1DConstraintFlag::eHAS_DRIVE_LIMIT && constraintDescs[0].driveLimitsAreForces)
driveScale = V4SetX(driveScale, FMin(one, dtV));
if (con1->flags&Px1DConstraintFlag::eHAS_DRIVE_LIMIT && constraintDescs[1].driveLimitsAreForces)
driveScale = V4SetY(driveScale, FMin(one, dtV));
if (con2->flags&Px1DConstraintFlag::eHAS_DRIVE_LIMIT && constraintDescs[2].driveLimitsAreForces)
driveScale = V4SetZ(driveScale, FMin(one, dtV));
if (con3->flags&Px1DConstraintFlag::eHAS_DRIVE_LIMIT && constraintDescs[3].driveLimitsAreForces)
driveScale = V4SetW(driveScale, FMin(one, dtV));
Vec4V clin00 = V4LoadA(&con0->linear0.x);
Vec4V clin01 = V4LoadA(&con1->linear0.x);
Vec4V clin02 = V4LoadA(&con2->linear0.x);
Vec4V clin03 = V4LoadA(&con3->linear0.x);
Vec4V cang00 = V4LoadA(&con0->angular0.x);
Vec4V cang01 = V4LoadA(&con1->angular0.x);
Vec4V cang02 = V4LoadA(&con2->angular0.x);
Vec4V cang03 = V4LoadA(&con3->angular0.x);
Vec4V clin0X, clin0Y, clin0Z;
Vec4V cang0X, cang0Y, cang0Z;
PX_TRANSPOSE_44_34(clin00, clin01, clin02, clin03, clin0X, clin0Y, clin0Z);
PX_TRANSPOSE_44_34(cang00, cang01, cang02, cang03, cang0X, cang0Y, cang0Z);
const Vec4V maxImpulse = V4LoadXYZW(con0->maxImpulse, con1->maxImpulse, con2->maxImpulse, con3->maxImpulse);
const Vec4V minImpulse = V4LoadXYZW(con0->minImpulse, con1->minImpulse, con2->minImpulse, con3->minImpulse);
Vec4V angDelta0X, angDelta0Y, angDelta0Z;
PX_TRANSPOSE_44_34(cangDelta00, cangDelta01, cangDelta02, cangDelta03, angDelta0X, angDelta0Y, angDelta0Z);
c->flags[0] = 0;
c->flags[1] = 0;
c->flags[2] = 0;
c->flags[3] = 0;
c->lin0X = clin0X;
c->lin0Y = clin0Y;
c->lin0Z = clin0Z;
c->ang0X = angDelta0X;
c->ang0Y = angDelta0Y;
c->ang0Z = angDelta0Z;
c->ang0WritebackX = cang0X;
c->ang0WritebackY = cang0Y;
c->ang0WritebackZ = cang0Z;
c->minImpulse = V4Mul(minImpulse, driveScale);
c->maxImpulse = V4Mul(maxImpulse, driveScale);
c->appliedForce = zero;
const Vec4V lin0MagSq = V4MulAdd(clin0Z, clin0Z, V4MulAdd(clin0Y, clin0Y, V4Mul(clin0X, clin0X)));
const Vec4V cang0DotAngDelta = V4MulAdd(angDelta0Z, angDelta0Z, V4MulAdd(angDelta0Y, angDelta0Y, V4Mul(angDelta0X, angDelta0X)));
c->flags[0] = 0;
c->flags[1] = 0;
c->flags[2] = 0;
c->flags[3] = 0;
Vec4V unitResponse = V4MulAdd(lin0MagSq, invMass0, V4Mul(cang0DotAngDelta, invInertiaScale0));
Vec4V clin10 = V4LoadA(&con0->linear1.x);
Vec4V clin11 = V4LoadA(&con1->linear1.x);
Vec4V clin12 = V4LoadA(&con2->linear1.x);
Vec4V clin13 = V4LoadA(&con3->linear1.x);
Vec4V cang10 = V4LoadA(&con0->angular1.x);
Vec4V cang11 = V4LoadA(&con1->angular1.x);
Vec4V cang12 = V4LoadA(&con2->angular1.x);
Vec4V cang13 = V4LoadA(&con3->angular1.x);
Vec4V clin1X, clin1Y, clin1Z;
Vec4V cang1X, cang1Y, cang1Z;
PX_TRANSPOSE_44_34(clin10, clin11, clin12, clin13, clin1X, clin1Y, clin1Z);
PX_TRANSPOSE_44_34(cang10, cang11, cang12, cang13, cang1X, cang1Y, cang1Z);
Vec4V angDelta1X, angDelta1Y, angDelta1Z;
PX_TRANSPOSE_44_34(cangDelta10, cangDelta11, cangDelta12, cangDelta13, angDelta1X, angDelta1Y, angDelta1Z);
const Vec4V lin1MagSq = V4MulAdd(clin1Z, clin1Z, V4MulAdd(clin1Y, clin1Y, V4Mul(clin1X, clin1X)));
const Vec4V cang1DotAngDelta = V4MulAdd(angDelta1Z, angDelta1Z, V4MulAdd(angDelta1Y, angDelta1Y, V4Mul(angDelta1X, angDelta1X)));
c->lin1X = clin1X;
c->lin1Y = clin1Y;
c->lin1Z = clin1Z;
c->ang1X = angDelta1X;
c->ang1Y = angDelta1Y;
c->ang1Z = angDelta1Z;
unitResponse = V4Add(unitResponse, V4MulAdd(lin1MagSq, invMass1, V4Mul(cang1DotAngDelta, invInertiaScale1)));
Vec4V linProj0(V4Mul(clin0X, linVel0T0));
Vec4V linProj1(V4Mul(clin1X, linVel1T0));
Vec4V angProj0(V4Mul(cang0X, angVel0T0));
Vec4V angProj1(V4Mul(cang1X, angVel1T0));
linProj0 = V4MulAdd(clin0Y, linVel0T1, linProj0);
linProj1 = V4MulAdd(clin1Y, linVel1T1, linProj1);
angProj0 = V4MulAdd(cang0Y, angVel0T1, angProj0);
angProj1 = V4MulAdd(cang1Y, angVel1T1, angProj1);
linProj0 = V4MulAdd(clin0Z, linVel0T2, linProj0);
linProj1 = V4MulAdd(clin1Z, linVel1T2, linProj1);
angProj0 = V4MulAdd(cang0Z, angVel0T2, angProj0);
angProj1 = V4MulAdd(cang1Z, angVel1T2, angProj1);
const Vec4V projectVel0 = V4Add(linProj0, angProj0);
const Vec4V projectVel1 = V4Add(linProj1, angProj1);
const Vec4V normalVel = V4Sub(projectVel0, projectVel1);
{
const PxVec4& ur = reinterpret_cast<const PxVec4&>(unitResponse);
PxVec4& cConstant = reinterpret_cast<PxVec4&>(c->constant);
PxVec4& cUnbiasedConstant = reinterpret_cast<PxVec4&>(c->unbiasedConstant);
PxVec4& cVelMultiplier = reinterpret_cast<PxVec4&>(c->velMultiplier);
PxVec4& cImpulseMultiplier = reinterpret_cast<PxVec4&>(c->impulseMultiplier);
setConstants(cConstant.x, cUnbiasedConstant.x, cVelMultiplier.x, cImpulseMultiplier.x,
*con0, ur.x, constraintDescs[0].minResponseThreshold, erp[0], dt, recipdt,
*constraintDescs[0].data0, *constraintDescs[0].data1, a >= constraintDescs[0].numRows);
setConstants(cConstant.y, cUnbiasedConstant.y, cVelMultiplier.y, cImpulseMultiplier.y,
*con1, ur.y, constraintDescs[1].minResponseThreshold, erp[1], dt, recipdt,
*constraintDescs[1].data0, *constraintDescs[1].data1, a >= constraintDescs[1].numRows);
setConstants(cConstant.z, cUnbiasedConstant.z, cVelMultiplier.z, cImpulseMultiplier.z,
*con2, ur.z, constraintDescs[2].minResponseThreshold, erp[2], dt, recipdt,
*constraintDescs[2].data0, *constraintDescs[2].data1, a >= constraintDescs[2].numRows);
setConstants(cConstant.w, cUnbiasedConstant.w, cVelMultiplier.w, cImpulseMultiplier.w,
*con3, ur.w, constraintDescs[3].minResponseThreshold, erp[3], dt, recipdt,
*constraintDescs[3].data0, *constraintDescs[3].data1, a >= constraintDescs[3].numRows);
}
const Vec4V velBias = V4Mul(c->velMultiplier, normalVel);
c->constant = V4Add(c->constant, velBias);
c->unbiasedConstant = V4Add(c->unbiasedConstant, velBias);
if(con0->flags & Px1DConstraintFlag::eOUTPUT_FORCE)
c->flags[0] |= DY_SC_FLAG_OUTPUT_FORCE;
if(con1->flags & Px1DConstraintFlag::eOUTPUT_FORCE)
c->flags[1] |= DY_SC_FLAG_OUTPUT_FORCE;
if(con2->flags & Px1DConstraintFlag::eOUTPUT_FORCE)
c->flags[2] |= DY_SC_FLAG_OUTPUT_FORCE;
if(con3->flags & Px1DConstraintFlag::eOUTPUT_FORCE)
c->flags[3] |= DY_SC_FLAG_OUTPUT_FORCE;
}
*(reinterpret_cast<PxU32*>(currPtr)) = 0;
*(reinterpret_cast<PxU32*>(currPtr + 4)) = 0;
}
//OK, we're ready to allocate and solve prep these constraints now :-)
return SolverConstraintPrepState::eSUCCESS;
}
}
}

View File

@ -0,0 +1,824 @@
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2021 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#include "foundation/PxPreprocessor.h"
#include "PxSceneDesc.h"
#include "PsVecMath.h"
#include "PsMathUtils.h"
#include "DySolverContact.h"
#include "DySolverContact4.h"
#include "DySolverConstraintTypes.h"
#include "PxcNpWorkUnit.h"
#include "DyThreadContext.h"
#include "DyContactPrep.h"
#include "PxcNpContactPrepShared.h"
#include "PxvDynamics.h"
#include "DyCorrelationBuffer.h"
#include "DyDynamics.h"
#include "DyArticulationContactPrep.h"
#include "PxsContactManager.h"
#include "PsFoundation.h"
#include "DyConstraintPrep.h"
using namespace physx;
using namespace Gu;
#include "PsVecMath.h"
#include "PxContactModifyCallback.h"
#include "PxsMaterialManager.h"
#include "PxsMaterialCombiner.h"
#include "DyContactPrepShared.h"
using namespace Ps::aos;
namespace physx
{
namespace Dy
{
PxcCreateFinalizeSolverContactMethod createFinalizeMethods[3] =
{
createFinalizeSolverContacts,
createFinalizeSolverContactsCoulomb1D,
createFinalizeSolverContactsCoulomb2D
};
static void setupFinalizeSolverConstraints(Sc::ShapeInteraction* shapeInteraction,
const ContactPoint* buffer,
const CorrelationBuffer& c,
const PxTransform& bodyFrame0,
const PxTransform& bodyFrame1,
PxU8* workspace,
const PxSolverBodyData& data0,
const PxSolverBodyData& data1,
const PxReal invDtF32,
PxReal bounceThresholdF32,
PxReal invMassScale0, PxReal invInertiaScale0,
PxReal invMassScale1, PxReal invInertiaScale1,
bool hasForceThreshold, bool staticOrKinematicBody,
const PxReal restDist, PxU8* frictionDataPtr,
const PxReal maxCCDSeparation,
const PxReal solverOffsetSlopF32)
{
// NOTE II: the friction patches are sparse (some of them have no contact patches, and
// therefore did not get written back to the cache) but the patch addresses are dense,
// corresponding to valid patches
const Vec3V solverOffsetSlop = V3Load(solverOffsetSlopF32);
const FloatV ccdMaxSeparation = FLoad(maxCCDSeparation);
PxU8 flags = PxU8(hasForceThreshold ? SolverContactHeader::eHAS_FORCE_THRESHOLDS : 0);
PxU8* PX_RESTRICT ptr = workspace;
PxU8 type = Ps::to8(staticOrKinematicBody ? DY_SC_TYPE_STATIC_CONTACT
: DY_SC_TYPE_RB_CONTACT);
const FloatV zero=FZero();
const Vec3V v3Zero = V3Zero();
const FloatV d0 = FLoad(invMassScale0);
const FloatV d1 = FLoad(invMassScale1);
const FloatV angD0 = FLoad(invInertiaScale0);
const FloatV angD1 = FLoad(invInertiaScale1);
const FloatV nDom1fV = FNeg(d1);
const FloatV invMass0 = FLoad(data0.invMass);
const FloatV invMass1 = FLoad(data1.invMass);
const FloatV invMass0_dom0fV = FMul(d0, invMass0);
const FloatV invMass1_dom1fV = FMul(nDom1fV, invMass1);
Vec4V staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W = V4Zero();
staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W=V4SetZ(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W, invMass0_dom0fV);
staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W=V4SetW(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W, invMass1_dom1fV);
const FloatV restDistance = FLoad(restDist);
const FloatV maxPenBias = FMax(FLoad(data0.penBiasClamp), FLoad(data1.penBiasClamp));
const QuatV bodyFrame0q = QuatVLoadU(&bodyFrame0.q.x);
const Vec3V bodyFrame0p = V3LoadU(bodyFrame0.p);
const QuatV bodyFrame1q = QuatVLoadU(&bodyFrame1.q.x);
const Vec3V bodyFrame1p = V3LoadU(bodyFrame1.p);
PxU32 frictionPatchWritebackAddrIndex = 0;
PxU32 contactWritebackCount = 0;
Ps::prefetchLine(c.contactID);
Ps::prefetchLine(c.contactID, 128);
const Vec3V linVel0 = V3LoadU_SafeReadW(data0.linearVelocity); // PT: safe because 'invMass' follows 'initialLinVel' in PxSolverBodyData
const Vec3V linVel1 = V3LoadU_SafeReadW(data1.linearVelocity); // PT: safe because 'invMass' follows 'initialLinVel' in PxSolverBodyData
const Vec3V angVel0 = V3LoadU_SafeReadW(data0.angularVelocity); // PT: safe because 'reportThreshold' follows 'initialAngVel' in PxSolverBodyData
const Vec3V angVel1 = V3LoadU_SafeReadW(data1.angularVelocity); // PT: safe because 'reportThreshold' follows 'initialAngVel' in PxSolverBodyData
PX_ALIGN(16, const Mat33V invSqrtInertia0)
(
V3LoadU_SafeReadW(data0.sqrtInvInertia.column0), // PT: safe because 'column1' follows 'column0' in PxMat33
V3LoadU_SafeReadW(data0.sqrtInvInertia.column1), // PT: safe because 'column2' follows 'column1' in PxMat33
V3LoadU(data0.sqrtInvInertia.column2)
);
PX_ALIGN(16, const Mat33V invSqrtInertia1)
(
V3LoadU_SafeReadW(data1.sqrtInvInertia.column0), // PT: safe because 'column1' follows 'column0' in PxMat33
V3LoadU_SafeReadW(data1.sqrtInvInertia.column1), // PT: safe because 'column2' follows 'column1' in PxMat33
V3LoadU(data1.sqrtInvInertia.column2)
);
const FloatV invDt = FLoad(invDtF32);
const FloatV p8 = FLoad(0.8f);
const FloatV bounceThreshold = FLoad(bounceThresholdF32);
const FloatV invDtp8 = FMul(invDt, p8);
for(PxU32 i=0;i<c.frictionPatchCount;i++)
{
PxU32 contactCount = c.frictionPatchContactCounts[i];
if(contactCount == 0)
continue;
const FrictionPatch& frictionPatch = c.frictionPatches[i];
PX_ASSERT(frictionPatch.anchorCount <= 2);
PxU32 firstPatch = c.correlationListHeads[i];
const Gu::ContactPoint* contactBase0 = buffer + c.contactPatches[firstPatch].start;
const PxReal combinedRestitution = contactBase0->restitution;
SolverContactHeader* PX_RESTRICT header = reinterpret_cast<SolverContactHeader*>(ptr);
ptr += sizeof(SolverContactHeader);
Ps::prefetchLine(ptr, 128);
Ps::prefetchLine(ptr, 256);
header->shapeInteraction = shapeInteraction;
header->flags = flags;
FStore(invMass0_dom0fV, &header->invMass0);
FStore(FNeg(invMass1_dom1fV), &header->invMass1);
const FloatV restitution = FLoad(combinedRestitution);
PxU32 pointStride = sizeof(SolverContactPoint);
PxU32 frictionStride = sizeof(SolverContactFriction);
const Vec3V normal = V3LoadA(buffer[c.contactPatches[c.correlationListHeads[i]].start].normal);
const FloatV normalLenSq = V3LengthSq(normal);
const VecCrossV norCross = V3PrepareCross(normal);
const FloatV norVel = V3SumElems(V3NegMulSub(normal, linVel1, V3Mul(normal, linVel0)));
const FloatV invMassNorLenSq0 = FMul(invMass0_dom0fV, normalLenSq);
const FloatV invMassNorLenSq1 = FMul(invMass1_dom1fV, normalLenSq);
header->normal_minAppliedImpulseForFrictionW = Vec4V_From_Vec3V(normal);
for(PxU32 patch=c.correlationListHeads[i];
patch!=CorrelationBuffer::LIST_END;
patch = c.contactPatches[patch].next)
{
const PxU32 count = c.contactPatches[patch].count;
const Gu::ContactPoint* contactBase = buffer + c.contactPatches[patch].start;
PxU8* p = ptr;
for(PxU32 j=0;j<count;j++)
{
Ps::prefetchLine(p, 256);
const Gu::ContactPoint& contact = contactBase[j];
SolverContactPoint* PX_RESTRICT solverContact = reinterpret_cast<SolverContactPoint*>(p);
p += pointStride;
constructContactConstraint(invSqrtInertia0, invSqrtInertia1, invMassNorLenSq0,
invMassNorLenSq1, angD0, angD1, bodyFrame0p, bodyFrame1p,
normal, norVel, norCross, angVel0, angVel1,
invDt, invDtp8, restDistance, maxPenBias, restitution,
bounceThreshold, contact, *solverContact,
ccdMaxSeparation, solverOffsetSlop);
}
ptr = p;
}
contactWritebackCount += contactCount;
PxF32* forceBuffers = reinterpret_cast<PxF32*>(ptr);
PxMemZero(forceBuffers, sizeof(PxF32) * contactCount);
ptr += ((contactCount + 3) & (~3)) * sizeof(PxF32); // jump to next 16-byte boundary
const PxReal frictionCoefficient = (contactBase0->materialFlags & PxMaterialFlag::eIMPROVED_PATCH_FRICTION && frictionPatch.anchorCount == 2) ? 0.5f : 1.f;
const PxReal staticFriction = contactBase0->staticFriction * frictionCoefficient;
const PxReal dynamicFriction = contactBase0->dynamicFriction* frictionCoefficient;
const bool disableStrongFriction = !!(contactBase0->materialFlags & PxMaterialFlag::eDISABLE_FRICTION);
staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W=V4SetX(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W, FLoad(staticFriction));
staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W=V4SetY(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W, FLoad(dynamicFriction));
const bool haveFriction = (disableStrongFriction == 0 && frictionPatch.anchorCount != 0) ;//PX_IR(n.staticFriction) > 0 || PX_IR(n.dynamicFriction) > 0;
header->numNormalConstr = Ps::to8(contactCount);
header->numFrictionConstr = Ps::to8(haveFriction ? frictionPatch.anchorCount*2 : 0);
header->type = type;
header->staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W = staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W;
FStore(angD0, &header->angDom0);
FStore(angD1, &header->angDom1);
header->broken = 0;
if(haveFriction)
{
const Vec3V linVrel = V3Sub(linVel0, linVel1);
//const Vec3V normal = Vec3V_From_PxVec3_Aligned(buffer.contacts[c.contactPatches[c.correlationListHeads[i]].start].normal);
const FloatV orthoThreshold = FLoad(0.70710678f);
const FloatV p1 = FLoad(0.0001f);
// fallback: normal.cross((1,0,0)) or normal.cross((0,0,1))
const FloatV normalX = V3GetX(normal);
const FloatV normalY = V3GetY(normal);
const FloatV normalZ = V3GetZ(normal);
Vec3V t0Fallback1 = V3Merge(zero, FNeg(normalZ), normalY);
Vec3V t0Fallback2 = V3Merge(FNeg(normalY), normalX, zero) ;
Vec3V t0Fallback = V3Sel(FIsGrtr(orthoThreshold, FAbs(normalX)), t0Fallback1, t0Fallback2);
Vec3V t0 = V3Sub(linVrel, V3Scale(normal, V3Dot(normal, linVrel)));
t0 = V3Sel(FIsGrtr(V3LengthSq(t0), p1), t0, t0Fallback);
t0 = V3Normalize(t0);
const VecCrossV t0Cross = V3PrepareCross(t0);
const Vec3V t1 = V3Cross(norCross, t0Cross);
const VecCrossV t1Cross = V3PrepareCross(t1);
// since we don't even have the body velocities we can't compute the tangent dirs, so
// the only thing we can do right now is to write the geometric information (which is the
// same for both axis constraints of an anchor) We put ra in the raXn field, rb in the rbXn
// field, and the error in the normal field. See corresponding comments in
// completeContactFriction()
//We want to set the writeBack ptr to point to the broken flag of the friction patch.
//On spu we have a slight problem here because the friction patch array is
//in local store rather than in main memory. The good news is that the address of the friction
//patch array in main memory is stored in the work unit. These two addresses will be equal
//except on spu where one is local store memory and the other is the effective address in main memory.
//Using the value stored in the work unit guarantees that the main memory address is used on all platforms.
PxU8* PX_RESTRICT writeback = frictionDataPtr + frictionPatchWritebackAddrIndex*sizeof(FrictionPatch);
header->frictionBrokenWritebackByte = writeback;
for(PxU32 j = 0; j < frictionPatch.anchorCount; j++)
{
Ps::prefetchLine(ptr, 256);
Ps::prefetchLine(ptr, 384);
SolverContactFriction* PX_RESTRICT f0 = reinterpret_cast<SolverContactFriction*>(ptr);
ptr += frictionStride;
SolverContactFriction* PX_RESTRICT f1 = reinterpret_cast<SolverContactFriction*>(ptr);
ptr += frictionStride;
Vec3V body0Anchor = V3LoadU(frictionPatch.body0Anchors[j]);
Vec3V body1Anchor = V3LoadU(frictionPatch.body1Anchors[j]);
const Vec3V ra = QuatRotate(bodyFrame0q, body0Anchor);
const Vec3V rb = QuatRotate(bodyFrame1q, body1Anchor);
/*ra = V3Sel(V3IsGrtr(solverOffsetSlop, V3Abs(ra)), v3Zero, ra);
rb = V3Sel(V3IsGrtr(solverOffsetSlop, V3Abs(rb)), v3Zero, rb);*/
PxU32 index = c.contactID[i][j];
Vec3V error = V3Sub(V3Add(ra, bodyFrame0p), V3Add(rb, bodyFrame1p));
error = V3Sel(V3IsGrtr(solverOffsetSlop, V3Abs(error)), v3Zero, error);
index = index == 0xFFFF ? c.contactPatches[c.correlationListHeads[i]].start : index;
const Vec3V tvel = V3LoadA(buffer[index].targetVel);
{
Vec3V raXn = V3Cross(ra, t0Cross);
Vec3V rbXn = V3Cross(rb, t0Cross);
raXn = V3Sel(V3IsGrtr(solverOffsetSlop, V3Abs(raXn)), V3Zero(), raXn);
rbXn = V3Sel(V3IsGrtr(solverOffsetSlop, V3Abs(rbXn)), V3Zero(), rbXn);
const Vec3V raXnSqrtInertia = M33MulV3(invSqrtInertia0, raXn);
const Vec3V rbXnSqrtInertia = M33MulV3(invSqrtInertia1, rbXn);
const FloatV resp0 = FAdd(invMass0_dom0fV, FMul(angD0, V3Dot(raXnSqrtInertia, raXnSqrtInertia)));
const FloatV resp1 = FSub(FMul(angD1, V3Dot(rbXnSqrtInertia, rbXnSqrtInertia)), invMass1_dom1fV);
const FloatV resp = FAdd(resp0, resp1);
const FloatV velMultiplier = FSel(FIsGrtr(resp, zero), FDiv(p8, resp), zero);
FloatV targetVel = V3Dot(tvel, t0);
const FloatV vrel1 = FAdd(V3Dot(t0, linVel0), V3Dot(raXn, angVel0));
const FloatV vrel2 = FAdd(V3Dot(t0, linVel1), V3Dot(rbXn, angVel1));
const FloatV vrel = FSub(vrel1, vrel2);
targetVel = FSub(targetVel, vrel);
f0->normalXYZ_appliedForceW = V4SetW(t0, zero);
f0->raXnXYZ_velMultiplierW = V4SetW(raXnSqrtInertia, velMultiplier);
f0->rbXnXYZ_biasW = V4SetW(rbXnSqrtInertia, FMul(V3Dot(t0, error), invDt));
FStore(targetVel, &f0->targetVel);
}
{
Vec3V raXn = V3Cross(ra, t1Cross);
Vec3V rbXn = V3Cross(rb, t1Cross);
raXn = V3Sel(V3IsGrtr(solverOffsetSlop, V3Abs(raXn)), V3Zero(), raXn);
rbXn = V3Sel(V3IsGrtr(solverOffsetSlop, V3Abs(rbXn)), V3Zero(), rbXn);
const Vec3V raXnSqrtInertia = M33MulV3(invSqrtInertia0, raXn);
const Vec3V rbXnSqrtInertia = M33MulV3(invSqrtInertia1, rbXn);
const FloatV resp0 = FAdd(invMass0_dom0fV, FMul(angD0, V3Dot(raXnSqrtInertia, raXnSqrtInertia)));
const FloatV resp1 = FSub(FMul(angD1, V3Dot(rbXnSqrtInertia, rbXnSqrtInertia)), invMass1_dom1fV);
const FloatV resp = FAdd(resp0, resp1);
const FloatV velMultiplier = FSel(FIsGrtr(resp, zero), FDiv(p8, resp), zero);
FloatV targetVel = V3Dot(tvel, t1);
const FloatV vrel1 = FAdd(V3Dot(t1, linVel0), V3Dot(raXn, angVel0));
const FloatV vrel2 = FAdd(V3Dot(t1, linVel1), V3Dot(rbXn, angVel1));
const FloatV vrel = FSub(vrel1, vrel2);
targetVel = FSub(targetVel, vrel);
f1->normalXYZ_appliedForceW = V4SetW(t1, zero);
f1->raXnXYZ_velMultiplierW = V4SetW(raXnSqrtInertia, velMultiplier);
f1->rbXnXYZ_biasW = V4SetW(rbXnSqrtInertia, FMul(V3Dot(t1, error), invDt));
FStore(targetVel, &f1->targetVel);
}
}
}
frictionPatchWritebackAddrIndex++;
}
}
PX_FORCE_INLINE void computeBlockStreamByteSizes(const bool useExtContacts, const CorrelationBuffer& c,
PxU32& _solverConstraintByteSize, PxU32& _frictionPatchByteSize, PxU32& _numFrictionPatches,
PxU32& _axisConstraintCount)
{
PX_ASSERT(0 == _solverConstraintByteSize);
PX_ASSERT(0 == _frictionPatchByteSize);
PX_ASSERT(0 == _numFrictionPatches);
PX_ASSERT(0 == _axisConstraintCount);
// PT: use local vars to remove LHS
PxU32 solverConstraintByteSize = 0;
PxU32 numFrictionPatches = 0;
PxU32 axisConstraintCount = 0;
for(PxU32 i = 0; i < c.frictionPatchCount; i++)
{
//Friction patches.
if(c.correlationListHeads[i] != CorrelationBuffer::LIST_END)
numFrictionPatches++;
const FrictionPatch& frictionPatch = c.frictionPatches[i];
const bool haveFriction = (frictionPatch.materialFlags & PxMaterialFlag::eDISABLE_FRICTION) == 0;
//Solver constraint data.
if(c.frictionPatchContactCounts[i]!=0)
{
solverConstraintByteSize += sizeof(SolverContactHeader);
solverConstraintByteSize += useExtContacts ? c.frictionPatchContactCounts[i] * sizeof(SolverContactPointExt)
: c.frictionPatchContactCounts[i] * sizeof(SolverContactPoint);
solverConstraintByteSize += sizeof(PxF32) * ((c.frictionPatchContactCounts[i] + 3)&(~3)); //Add on space for applied impulses
axisConstraintCount += c.frictionPatchContactCounts[i];
if(haveFriction)
{
solverConstraintByteSize += useExtContacts ? c.frictionPatches[i].anchorCount * 2 * sizeof(SolverContactFrictionExt)
: c.frictionPatches[i].anchorCount * 2 * sizeof(SolverContactFriction);
axisConstraintCount += c.frictionPatches[i].anchorCount * 2;
}
}
}
PxU32 frictionPatchByteSize = numFrictionPatches*sizeof(FrictionPatch);
_numFrictionPatches = numFrictionPatches;
_axisConstraintCount = axisConstraintCount;
//16-byte alignment.
_frictionPatchByteSize = ((frictionPatchByteSize + 0x0f) & ~0x0f);
_solverConstraintByteSize = ((solverConstraintByteSize + 0x0f) & ~0x0f);
PX_ASSERT(0 == (_solverConstraintByteSize & 0x0f));
PX_ASSERT(0 == (_frictionPatchByteSize & 0x0f));
}
static bool reserveBlockStreams(const bool useExtContacts, Dy::CorrelationBuffer& cBuffer,
PxU8*& solverConstraint,
FrictionPatch*& _frictionPatches,
PxU32& numFrictionPatches, PxU32& solverConstraintByteSize,
PxU32& axisConstraintCount, PxConstraintAllocator& constraintAllocator)
{
PX_ASSERT(NULL == solverConstraint);
PX_ASSERT(NULL == _frictionPatches);
PX_ASSERT(0 == numFrictionPatches);
PX_ASSERT(0 == solverConstraintByteSize);
PX_ASSERT(0 == axisConstraintCount);
//From frictionPatchStream we just need to reserve a single buffer.
PxU32 frictionPatchByteSize = 0;
//Compute the sizes of all the buffers.
computeBlockStreamByteSizes(
useExtContacts, cBuffer,
solverConstraintByteSize, frictionPatchByteSize, numFrictionPatches,
axisConstraintCount);
//Reserve the buffers.
//First reserve the accumulated buffer size for the constraint block.
PxU8* constraintBlock = NULL;
const PxU32 constraintBlockByteSize = solverConstraintByteSize;
if(constraintBlockByteSize > 0)
{
constraintBlock = constraintAllocator.reserveConstraintData(constraintBlockByteSize + 16u);
if(0==constraintBlock || (reinterpret_cast<PxU8*>(-1))==constraintBlock)
{
if(0==constraintBlock)
{
PX_WARN_ONCE(
"Reached limit set by PxSceneDesc::maxNbContactDataBlocks - ran out of buffer space for constraint prep. "
"Either accept dropped contacts or increase buffer size allocated for narrow phase by increasing PxSceneDesc::maxNbContactDataBlocks.");
}
else
{
PX_WARN_ONCE(
"Attempting to allocate more than 16K of contact data for a single contact pair in constraint prep. "
"Either accept dropped contacts or simplify collision geometry.");
constraintBlock=NULL;
}
}
PX_ASSERT((size_t(constraintBlock) & 0xF) == 0);
}
FrictionPatch* frictionPatches = NULL;
//If the constraint block reservation didn't fail then reserve the friction buffer too.
if(frictionPatchByteSize >0 && (0==constraintBlockByteSize || constraintBlock))
{
frictionPatches = reinterpret_cast<FrictionPatch*>(constraintAllocator.reserveFrictionData(frictionPatchByteSize));
if(0==frictionPatches || (reinterpret_cast<FrictionPatch*>(-1))==frictionPatches)
{
if(0==frictionPatches)
{
PX_WARN_ONCE(
"Reached limit set by PxSceneDesc::maxNbContactDataBlocks - ran out of buffer space for constraint prep. "
"Either accept dropped contacts or increase buffer size allocated for narrow phase by increasing PxSceneDesc::maxNbContactDataBlocks.");
}
else
{
PX_WARN_ONCE(
"Attempting to allocate more than 16K of friction data for a single contact pair in constraint prep. "
"Either accept dropped contacts or simplify collision geometry.");
frictionPatches=NULL;
}
}
}
_frictionPatches = frictionPatches;
//Patch up the individual ptrs to the buffer returned by the constraint block reservation (assuming the reservation didn't fail).
if(0==constraintBlockByteSize || constraintBlock)
{
if(solverConstraintByteSize)
{
solverConstraint = constraintBlock;
PX_ASSERT(0==(uintptr_t(solverConstraint) & 0x0f));
}
}
//Return true if neither of the two block reservations failed.
return ((0==constraintBlockByteSize || constraintBlock) && (0==frictionPatchByteSize || frictionPatches));
}
bool createFinalizeSolverContacts(
PxSolverContactDesc& contactDesc,
CorrelationBuffer& c,
const PxReal invDtF32,
PxReal bounceThresholdF32,
PxReal frictionOffsetThreshold,
PxReal correlationDistance,
PxReal solverOffsetSlop,
PxConstraintAllocator& constraintAllocator,
Cm::SpatialVectorF* Z)
{
Ps::prefetchLine(contactDesc.body0);
Ps::prefetchLine(contactDesc.body1);
Ps::prefetchLine(contactDesc.data0);
Ps::prefetchLine(contactDesc.data1);
c.frictionPatchCount = 0;
c.contactPatchCount = 0;
const bool hasForceThreshold = contactDesc.hasForceThresholds;
const bool staticOrKinematicBody = contactDesc.bodyState1 == PxSolverContactDesc::eKINEMATIC_BODY || contactDesc.bodyState1 == PxSolverContactDesc::eSTATIC_BODY;
const bool disableStrongFriction = contactDesc.disableStrongFriction;
const bool useExtContacts = ((contactDesc.bodyState0 | contactDesc.bodyState1) & PxSolverContactDesc::eARTICULATION) != 0;
PxSolverConstraintDesc& desc = *contactDesc.desc;
desc.constraintLengthOver16 = 0;
if (contactDesc.numContacts == 0)
{
contactDesc.frictionPtr = NULL;
contactDesc.frictionCount = 0;
desc.constraint = NULL;
return true;
}
if (!disableStrongFriction)
{
getFrictionPatches(c, contactDesc.frictionPtr, contactDesc.frictionCount, contactDesc.bodyFrame0, contactDesc.bodyFrame1, correlationDistance);
}
bool overflow = !createContactPatches(c, contactDesc.contacts, contactDesc.numContacts, PXC_SAME_NORMAL);
overflow = correlatePatches(c, contactDesc.contacts, contactDesc.bodyFrame0, contactDesc.bodyFrame1, PXC_SAME_NORMAL, 0, 0) || overflow;
PX_UNUSED(overflow);
#if PX_CHECKED
if (overflow)
{
Ps::getFoundation().error(physx::PxErrorCode::eDEBUG_WARNING, __FILE__, __LINE__,
"Dropping contacts in solver because we exceeded limit of 32 friction patches.");
}
#endif
growPatches(c, contactDesc.contacts, contactDesc.bodyFrame0, contactDesc.bodyFrame1, correlationDistance, 0, frictionOffsetThreshold + contactDesc.restDistance);
//PX_ASSERT(patchCount == c.frictionPatchCount);
FrictionPatch* frictionPatches = NULL;
PxU8* solverConstraint = NULL;
PxU32 numFrictionPatches = 0;
PxU32 solverConstraintByteSize = 0;
PxU32 axisConstraintCount = 0;
const bool successfulReserve = reserveBlockStreams(
useExtContacts, c,
solverConstraint, frictionPatches,
numFrictionPatches,
solverConstraintByteSize,
axisConstraintCount,
constraintAllocator);
// initialise the work unit's ptrs to the various buffers.
contactDesc.frictionPtr = NULL;
contactDesc.frictionCount = 0;
desc.constraint = NULL;
desc.constraintLengthOver16 = 0;
// patch up the work unit with the reserved buffers and set the reserved buffer data as appropriate.
if (successfulReserve)
{
PxU8* frictionDataPtr = reinterpret_cast<PxU8*>(frictionPatches);
contactDesc.frictionPtr = frictionDataPtr;
desc.constraint = solverConstraint;
//output.nbContacts = Ps::to8(numContacts);
contactDesc.frictionCount = Ps::to8(numFrictionPatches);
desc.constraintLengthOver16 = Ps::to16(solverConstraintByteSize / 16);
desc.writeBack = contactDesc.contactForces;
desc.writeBackLengthOver4 = PxU16(contactDesc.contactForces ? contactDesc.numContacts : 0);
//Initialise friction buffer.
if (frictionPatches)
{
// PT: TODO: revisit this... not very satisfying
//const PxU32 maxSize = numFrictionPatches*sizeof(FrictionPatch);
Ps::prefetchLine(frictionPatches);
Ps::prefetchLine(frictionPatches, 128);
Ps::prefetchLine(frictionPatches, 256);
for (PxU32 i = 0; i<c.frictionPatchCount; i++)
{
//if(c.correlationListHeads[i]!=CorrelationBuffer::LIST_END)
if (c.frictionPatchContactCounts[i])
{
*frictionPatches++ = c.frictionPatches[i];
Ps::prefetchLine(frictionPatches, 256);
}
}
}
//Initialise solverConstraint buffer.
if (solverConstraint)
{
const PxSolverBodyData& data0 = *contactDesc.data0;
const PxSolverBodyData& data1 = *contactDesc.data1;
if (useExtContacts)
{
const SolverExtBody b0(reinterpret_cast<const void*>(contactDesc.body0), reinterpret_cast<const void*>(&data0), desc.linkIndexA);
const SolverExtBody b1(reinterpret_cast<const void*>(contactDesc.body1), reinterpret_cast<const void*>(&data1), desc.linkIndexB);
setupFinalizeExtSolverContacts(contactDesc.contacts, c, contactDesc.bodyFrame0, contactDesc.bodyFrame1, solverConstraint,
b0, b1, invDtF32, bounceThresholdF32,
contactDesc.invMassScales.linear0, contactDesc.invMassScales.angular0, contactDesc.invMassScales.linear1, contactDesc.invMassScales.angular1,
contactDesc.restDistance, frictionDataPtr, contactDesc.maxCCDSeparation, Z);
}
else
{
setupFinalizeSolverConstraints(contactDesc.shapeInteraction, contactDesc.contacts, c, contactDesc.bodyFrame0, contactDesc.bodyFrame1, solverConstraint,
data0, data1, invDtF32, bounceThresholdF32,
contactDesc.invMassScales.linear0, contactDesc.invMassScales.angular0, contactDesc.invMassScales.linear1, contactDesc.invMassScales.angular1,
hasForceThreshold, staticOrKinematicBody, contactDesc.restDistance, frictionDataPtr, contactDesc.maxCCDSeparation, solverOffsetSlop);
}
//KS - set to 0 so we have a counter for the number of times we solved the constraint
//only going to be used on SPU but might as well set on all platforms because this code is shared
*(reinterpret_cast<PxU32*>(solverConstraint + solverConstraintByteSize)) = 0;
}
}
return successfulReserve;
}
FloatV setupExtSolverContact(const SolverExtBody& b0, const SolverExtBody& b1,
const FloatV& d0, const FloatV& d1, const FloatV& angD0, const FloatV& angD1, const Vec3V& bodyFrame0p, const Vec3V& bodyFrame1p,
const Vec3VArg normal, const FloatVArg invDt, const FloatVArg invDtp8, const FloatVArg restDistance, const FloatVArg maxPenBias, const FloatVArg restitution,
const FloatVArg bounceThreshold, const Gu::ContactPoint& contact, SolverContactPointExt& solverContact, const FloatVArg ccdMaxSeparation, Cm::SpatialVectorF* zVector,
const Cm::SpatialVectorV& v0, const Cm::SpatialVectorV& v1)
{
const FloatV zero = FZero();
const FloatV separation = FLoad(contact.separation);
const FloatV penetration = FSub(separation, restDistance);
const Vec3V point = V3LoadA(contact.point);
const Vec3V ra = V3Sub(point, bodyFrame0p);
const Vec3V rb = V3Sub(point, bodyFrame1p);
const Vec3V raXn = V3Cross(ra, normal);
const Vec3V rbXn = V3Cross(rb, normal);
Cm::SpatialVectorV deltaV0, deltaV1;
const Cm::SpatialVectorV resp0 = createImpulseResponseVector(normal, raXn, b0);
const Cm::SpatialVectorV resp1 = createImpulseResponseVector(V3Neg(normal), V3Neg(rbXn), b1);
const FloatV unitResponse = getImpulseResponse(b0, resp0, deltaV0, d0, angD0,
b1, resp1, deltaV1, d1, angD1, reinterpret_cast<Cm::SpatialVectorV*>(zVector));
const Vec3V vel0 = V3Add(V3Mul(v0.linear, normal), V3Mul(v0.angular, raXn));
const Vec3V vel1 = V3Add(V3Mul(v1.linear, normal), V3Mul(v1.angular, rbXn));
const FloatV vrel = V3SumElems(V3Sub(vel0, vel1));
FloatV velMultiplier = FSel(FIsGrtr(FLoad(DY_ARTICULATION_MIN_RESPONSE), unitResponse), zero, FRecip(FAdd(unitResponse, FLoad(DY_ARTICULATION_CFM))));
//FloatV velMultiplier = FSel(FIsGrtr(FEps(), unitResponse), zero, FRecip(unitResponse));
FloatV scaledBias = FMul(velMultiplier, FMax(maxPenBias, FMul(penetration, invDtp8)));
const FloatV penetrationInvDt = FMul(penetration, invDt);
const BoolV isGreater2 = BAnd(BAnd(FIsGrtr(restitution, zero), FIsGrtr(bounceThreshold, vrel)), FIsGrtr(FNeg(vrel), penetrationInvDt));
const BoolV ccdSeparationCondition = FIsGrtrOrEq(ccdMaxSeparation, penetration);
scaledBias = FSel(BAnd(ccdSeparationCondition, isGreater2), zero, scaledBias);
FloatV targetVelocity = FSel(isGreater2, FMul(FNeg(vrel), restitution), zero);
//Get the rigid body's current velocity and embed into the constraint target velocities
if (b0.mLinkIndex == PxSolverConstraintDesc::NO_LINK)
targetVelocity = FSub(targetVelocity, V3SumElems(vel0));
else if (b1.mLinkIndex == PxSolverConstraintDesc::NO_LINK)
targetVelocity = FAdd(targetVelocity, V3SumElems(vel1));
targetVelocity = FAdd(targetVelocity, V3Dot(V3LoadA(contact.targetVel), normal));
const FloatV biasedErr = FScaleAdd(targetVelocity, velMultiplier, FNeg(scaledBias));
const FloatV unbiasedErr = FScaleAdd(targetVelocity, velMultiplier, FSel(isGreater2, zero, FNeg(FMax(scaledBias, zero))));
const FloatV deltaF = FMax(FNegScaleSub(vrel, velMultiplier, biasedErr), zero);
FStore(velMultiplier, &solverContact.velMultiplier);
FStore(biasedErr, &solverContact.biasedErr);
FStore(unbiasedErr, &solverContact.unbiasedErr);
solverContact.maxImpulse = contact.maxImpulse;
solverContact.raXn = resp0.angular;
solverContact.rbXn = V3Neg(resp1.angular);
solverContact.linDeltaVA = deltaV0.linear;
solverContact.angDeltaVA = deltaV0.angular;
solverContact.linDeltaVB = deltaV1.linear;
solverContact.angDeltaVB = deltaV1.angular;
return deltaF;
}
bool createFinalizeSolverContacts(PxSolverContactDesc& contactDesc,
PxsContactManagerOutput& output,
ThreadContext& threadContext,
const PxReal invDtF32,
PxReal bounceThresholdF32,
PxReal frictionOffsetThreshold,
PxReal correlationDistance,
PxReal solverOffsetSlop,
PxConstraintAllocator& constraintAllocator,
Cm::SpatialVectorF* Z)
{
ContactBuffer& buffer = threadContext.mContactBuffer;
buffer.count = 0;
// We pull the friction patches out of the cache to remove the dependency on how
// the cache is organized. Remember original addrs so we can write them back
// efficiently.
PxU32 numContacts = 0;
{
PxReal invMassScale0 = 1.f;
PxReal invMassScale1 = 1.f;
PxReal invInertiaScale0 = 1.f;
PxReal invInertiaScale1 = 1.f;
bool hasMaxImpulse = false, hasTargetVelocity = false;
numContacts = extractContacts(buffer, output, hasMaxImpulse, hasTargetVelocity, invMassScale0, invMassScale1,
invInertiaScale0, invInertiaScale1, PxMin(contactDesc.data0->maxContactImpulse, contactDesc.data1->maxContactImpulse));
contactDesc.contacts = buffer.contacts;
contactDesc.numContacts = numContacts;
contactDesc.disableStrongFriction = contactDesc.disableStrongFriction || hasTargetVelocity;
contactDesc.hasMaxImpulse = hasMaxImpulse;
contactDesc.invMassScales.linear0 *= invMassScale0;
contactDesc.invMassScales.linear1 *= invMassScale1;
contactDesc.invMassScales.angular0 *= invInertiaScale0;
contactDesc.invMassScales.angular1 *= invInertiaScale1;
}
CorrelationBuffer& c = threadContext.mCorrelationBuffer;
return createFinalizeSolverContacts(contactDesc, c, invDtF32, bounceThresholdF32, frictionOffsetThreshold,
correlationDistance, solverOffsetSlop, constraintAllocator, Z);
}
PxU32 getContactManagerConstraintDesc(const PxsContactManagerOutput& cmOutput, const PxsContactManager& /*cm*/, PxSolverConstraintDesc& desc)
{
desc.writeBackLengthOver4 = cmOutput.nbContacts;
desc.writeBack = cmOutput.contactForces;
return cmOutput.nbContacts;// cm.getWorkUnit().axisConstraintCount;
}
}
}

View File

@ -0,0 +1,213 @@
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2021 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#ifndef DY_CONTACTPREP_H
#define DY_CONTACTPREP_H
#include "DySolverConstraintDesc.h"
#include "PxSceneDesc.h"
#include "DySolverContact4.h"
namespace physx
{
struct PxcNpWorkUnit;
class PxsConstraintBlockManager;
class PxcConstraintBlockStream;
struct PxsContactManagerOutput;
class FrictionPatchStreamPair;
struct PxSolverBody;
struct PxSolverBodyData;
struct PxSolverConstraintDesc;
class PxsContactManager;
namespace Dy
{
class ThreadContext;
struct CorrelationBuffer;
#define CREATE_FINALIZE_SOLVER_CONTACT_METHOD_ARGS \
PxSolverContactDesc& contactDesc, \
PxsContactManagerOutput& output, \
ThreadContext& threadContext, \
const PxReal invDtF32, \
PxReal bounceThresholdF32, \
PxReal frictionOffsetThreshold, \
PxReal correlationDistance, \
PxReal solverOffsetSlop, \
PxConstraintAllocator& constraintAllocator, \
Cm::SpatialVectorF* Z
#define CREATE_FINALIZE_SOVLER_CONTACT_METHOD_ARGS_4 \
PxsContactManagerOutput** outputs, \
ThreadContext& threadContext, \
PxSolverContactDesc* blockDescs, \
const PxReal invDtF32, \
PxReal bounceThresholdF32, \
PxReal frictionThresholdF32, \
PxReal correlationDistanceF32, \
PxReal solverOffsetSlopF32, \
PxConstraintAllocator& constraintAllocator
/*!
Method prototype for create finalize solver contact
*/
typedef bool (*PxcCreateFinalizeSolverContactMethod)(CREATE_FINALIZE_SOLVER_CONTACT_METHOD_ARGS);
extern PxcCreateFinalizeSolverContactMethod createFinalizeMethods[3];
typedef SolverConstraintPrepState::Enum (*PxcCreateFinalizeSolverContactMethod4)(CREATE_FINALIZE_SOVLER_CONTACT_METHOD_ARGS_4);
extern PxcCreateFinalizeSolverContactMethod4 createFinalizeMethods4[3];
bool createFinalizeSolverContacts( PxSolverContactDesc& contactDesc,
PxsContactManagerOutput& output,
ThreadContext& threadContext,
const PxReal invDtF32,
PxReal bounceThresholdF32,
PxReal frictionOffsetThreshold,
PxReal correlationDistance,
PxReal solverOffsetSlop,
PxConstraintAllocator& constraintAllocator,
Cm::SpatialVectorF* Z);
bool createFinalizeSolverContacts( PxSolverContactDesc& contactDesc,
CorrelationBuffer& c,
const PxReal invDtF32,
PxReal bounceThresholdF32,
PxReal frictionOffsetThreshold,
PxReal correlationDistance,
PxReal solverOffsetSlop,
PxConstraintAllocator& constraintAllocator,
Cm::SpatialVectorF* Z);
SolverConstraintPrepState::Enum createFinalizeSolverContacts4( PxsContactManagerOutput** outputs,
ThreadContext& threadContext,
PxSolverContactDesc* blockDescs,
const PxReal invDtF32,
PxReal bounceThresholdF32,
PxReal frictionOffsetThreshold,
PxReal correlationDistance,
PxReal solverOffsetSlop,
PxConstraintAllocator& constraintAllocator);
SolverConstraintPrepState::Enum createFinalizeSolverContacts4( Dy::CorrelationBuffer& c,
PxSolverContactDesc* blockDescs,
const PxReal invDtF32,
PxReal bounceThresholdF32,
PxReal frictionOffsetThreshold,
PxReal correlationDistance,
PxReal solverOffsetSlop,
PxConstraintAllocator& constraintAllocator);
bool createFinalizeSolverContactsCoulomb1D(PxSolverContactDesc& contactDesc,
PxsContactManagerOutput& output,
ThreadContext& threadContext,
const PxReal invDtF32,
PxReal bounceThresholdF32,
PxReal frictionOffsetThreshold,
PxReal correlationDistance,
PxReal solverOffsetSlop,
PxConstraintAllocator& constraintAllocator,
Cm::SpatialVectorF* Z);
bool createFinalizeSolverContactsCoulomb2D(PxSolverContactDesc& contactDesc,
PxsContactManagerOutput& output,
ThreadContext& threadContext,
const PxReal invDtF32,
PxReal bounceThresholdF32,
PxReal frictionOffsetThreshold,
PxReal correlationDistance,
PxReal solverOffsetSlop,
PxConstraintAllocator& constraintAllocator,
Cm::SpatialVectorF* Z);
SolverConstraintPrepState::Enum createFinalizeSolverContacts4Coulomb1D( PxsContactManagerOutput** outputs,
ThreadContext& threadContext,
PxSolverContactDesc* blockDescs,
const PxReal invDtF32,
PxReal bounceThresholdF32,
PxReal frictionOffsetThreshold,
PxReal correlationDistance,
PxReal solverOffsetSlop,
PxConstraintAllocator& constraintAllocator);
SolverConstraintPrepState::Enum createFinalizeSolverContacts4Coulomb2D(PxsContactManagerOutput** outputs,
ThreadContext& threadContext,
PxSolverContactDesc* blockDescs,
const PxReal invDtF32,
PxReal bounceThresholdF32,
PxReal frictionOffsetThreshold,
PxReal correlationDistance,
PxReal solverOffsetSlop,
PxConstraintAllocator& constraintAllocator);
PxU32 getContactManagerConstraintDesc(const PxsContactManagerOutput& cmOutput, const PxsContactManager& cm, PxSolverConstraintDesc& desc);
class BlockAllocator : public PxConstraintAllocator
{
PxsConstraintBlockManager& mConstraintBlockManager;
PxcConstraintBlockStream& mConstraintBlockStream;
FrictionPatchStreamPair& mFrictionPatchStreamPair;
PxU32& mTotalConstraintByteSize;
public:
BlockAllocator(PxsConstraintBlockManager& constraintBlockManager, PxcConstraintBlockStream& constraintBlockStream, FrictionPatchStreamPair& frictionPatchStreamPair,
PxU32& totalConstraintByteSize) :
mConstraintBlockManager(constraintBlockManager), mConstraintBlockStream(constraintBlockStream), mFrictionPatchStreamPair(frictionPatchStreamPair),
mTotalConstraintByteSize(totalConstraintByteSize)
{
}
virtual PxU8* reserveConstraintData(const PxU32 size);
virtual PxU8* reserveFrictionData(const PxU32 size);
virtual PxU8* findInputPatches(PxU8* frictionCookie)
{
return frictionCookie;
}
PX_NOCOPY(BlockAllocator)
};
}
}
#endif //DY_CONTACTPREP_H

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,660 @@
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2021 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#include "foundation/PxPreprocessor.h"
#include "PsVecMath.h"
#include "PsMathUtils.h"
#include "DySolverContact.h"
#include "DySolverContactPF.h"
#include "DySolverConstraintTypes.h"
#include "PxcNpWorkUnit.h"
#include "DyThreadContext.h"
#include "DyContactPrep.h"
#include "PxcNpContactPrepShared.h"
//#include "PxvGeometry.h"
#include "PxvDynamics.h"
#include "DyCorrelationBuffer.h"
#include "DySolverConstraintDesc.h"
#include "DySolverBody.h"
#include "DySolverContact4.h"
#include "DySolverContactPF4.h"
#include "PsVecMath.h"
#include "PxContactModifyCallback.h"
#include "PxsMaterialManager.h"
#include "PxsMaterialCombiner.h"
#include "DySolverExt.h"
#include "DyArticulationContactPrep.h"
#include "DyContactPrepShared.h"
#include "PsFoundation.h"
using namespace physx::Gu;
using namespace physx::shdfnd::aos;
namespace physx
{
namespace Dy
{
bool createFinalizeSolverContactsCoulomb(PxSolverContactDesc& contactDesc,
PxsContactManagerOutput& output,
ThreadContext& threadContext,
const PxReal invDtF32,
PxReal bounceThresholdF32,
PxReal frictionOffsetThreshold,
PxReal correlationDistance,
PxReal solverOffsetSlop,
PxConstraintAllocator& constraintAllocator,
PxFrictionType::Enum frictionType,
Cm::SpatialVectorF* Z);
static bool setupFinalizeSolverConstraintsCoulomb(
Sc::ShapeInteraction* shapeInteraction,
const ContactBuffer& buffer,
const CorrelationBuffer& c,
const PxTransform& bodyFrame0,
const PxTransform& bodyFrame1,
PxU8* workspace,
const PxSolverBodyData& data0,
const PxSolverBodyData& data1,
const PxReal invDtF32,
PxReal bounceThresholdF32,
PxU32 frictionPerPointCount,
const bool hasForceThresholds,
const bool staticBody,
PxReal invMassScale0, PxReal invInertiaScale0,
PxReal invMassScale1, PxReal invInertiaScale1,
PxReal restDist,
const PxReal maxCCDSeparation,
const PxReal solverOffsetSlopF32)
{
const FloatV ccdMaxSeparation = FLoad(maxCCDSeparation);
const Vec3V solverOffsetSlop = V3Load(solverOffsetSlopF32);
PxU8* PX_RESTRICT ptr = workspace;
const FloatV zero=FZero();
PxU8 flags = PxU8(hasForceThresholds ? SolverContactHeader::eHAS_FORCE_THRESHOLDS : 0);
const FloatV restDistance = FLoad(restDist);
const Vec3V bodyFrame0p = V3LoadU(bodyFrame0.p);
const Vec3V bodyFrame1p = V3LoadU(bodyFrame1.p);
Ps::prefetchLine(c.contactID);
Ps::prefetchLine(c.contactID, 128);
const PxU32 frictionPatchCount = c.frictionPatchCount;
const PxU32 pointStride = sizeof(SolverContactPoint);
const PxU32 frictionStride = sizeof(SolverContactFriction);
const PxU8 pointHeaderType = Ps::to8(staticBody ? DY_SC_TYPE_STATIC_CONTACT : DY_SC_TYPE_RB_CONTACT);
const PxU8 frictionHeaderType = Ps::to8(staticBody ? DY_SC_TYPE_STATIC_FRICTION : DY_SC_TYPE_FRICTION);
const Vec3V linVel0 = V3LoadU(data0.linearVelocity);
const Vec3V linVel1 = V3LoadU(data1.linearVelocity);
const Vec3V angVel0 = V3LoadU(data0.angularVelocity);
const Vec3V angVel1 = V3LoadU(data1.angularVelocity);
const FloatV invMass0 = FLoad(data0.invMass);
const FloatV invMass1 = FLoad(data1.invMass);
const FloatV maxPenBias = FMax(FLoad(data0.penBiasClamp), FLoad(data1.penBiasClamp));
// PT: the matrix is symmetric so we can read it as a PxMat33! Gets rid of 25000+ LHS.
const PxMat33& invIn0 = reinterpret_cast<const PxMat33&>(data0.sqrtInvInertia);
PX_ALIGN(16, const Mat33V invSqrtInertia0)
(
V3LoadU(invIn0.column0),
V3LoadU(invIn0.column1),
V3LoadU(invIn0.column2)
);
const PxMat33& invIn1 = reinterpret_cast<const PxMat33&>(data1.sqrtInvInertia);
PX_ALIGN(16, const Mat33V invSqrtInertia1)
(
V3LoadU(invIn1.column0),
V3LoadU(invIn1.column1),
V3LoadU(invIn1.column2)
);
const FloatV invDt = FLoad(invDtF32);
const FloatV p8 = FLoad(0.8f);
const FloatV bounceThreshold = FLoad(bounceThresholdF32);
const FloatV orthoThreshold = FLoad(0.70710678f);
const FloatV eps = FLoad(0.00001f);
const FloatV invDtp8 = FMul(invDt, p8);
const FloatV d0 = FLoad(invMassScale0);
const FloatV d1 = FLoad(invMassScale1);
const FloatV nDom1fV = FNeg(d1);
const FloatV angD0 = FLoad(invInertiaScale0);
const FloatV angD1 = FLoad(invInertiaScale1);
const FloatV invMass0_dom0fV = FMul(d0, invMass0);
const FloatV invMass1_dom1fV = FMul(nDom1fV, invMass1);
for(PxU32 i=0;i< frictionPatchCount;i++)
{
const PxU32 contactCount = c.frictionPatchContactCounts[i];
if(contactCount == 0)
continue;
const Gu::ContactPoint* contactBase0 = buffer.contacts + c.contactPatches[c.correlationListHeads[i]].start;
const Vec3V normal = Ps::aos::V3LoadA(contactBase0->normal);
const FloatV normalLenSq = V3LengthSq(normal);
const VecCrossV norCross = V3PrepareCross(normal);
const FloatV restitution = FLoad(contactBase0->restitution);
const FloatV norVel = V3SumElems(V3NegMulSub(normal, linVel1, V3Mul(normal, linVel0)));
/*const FloatV norVel0 = V3Dot(normal, linVel0);
const FloatV norVel1 = V3Dot(normal, linVel1);
const FloatV norVel = FSub(norVel0, norVel1);*/
const FloatV invMassNorLenSq0 = FMul(invMass0_dom0fV, normalLenSq);
const FloatV invMassNorLenSq1 = FMul(invMass1_dom1fV, normalLenSq);
SolverContactCoulombHeader* PX_RESTRICT header = reinterpret_cast<SolverContactCoulombHeader*>(ptr);
ptr += sizeof(SolverContactCoulombHeader);
Ps::prefetchLine(ptr, 128);
Ps::prefetchLine(ptr, 256);
Ps::prefetchLine(ptr, 384);
header->numNormalConstr = PxU8(contactCount);
header->type = pointHeaderType;
//header->setRestitution(n.restitution);
//header->setRestitution(contactBase0->restitution);
header->setDominance0(invMass0_dom0fV);
header->setDominance1(FNeg(invMass1_dom1fV));
FStore(angD0, &header->angDom0);
FStore(angD1, &header->angDom1);
header->setNormal(normal);
header->flags = flags;
header->shapeInteraction = shapeInteraction;
for(PxU32 patch=c.correlationListHeads[i];
patch!=CorrelationBuffer::LIST_END;
patch = c.contactPatches[patch].next)
{
const PxU32 count = c.contactPatches[patch].count;
const Gu::ContactPoint* contactBase = buffer.contacts + c.contactPatches[patch].start;
PxU8* p = ptr;
for(PxU32 j=0;j<count;j++)
{
const Gu::ContactPoint& contact = contactBase[j];
SolverContactPoint* PX_RESTRICT solverContact = reinterpret_cast<SolverContactPoint*>(p);
p += pointStride;
constructContactConstraint(invSqrtInertia0, invSqrtInertia1, invMassNorLenSq0,
invMassNorLenSq1, angD0, angD1, bodyFrame0p, bodyFrame1p,
normal, norVel, norCross, angVel0, angVel1,
invDt, invDtp8, restDistance, maxPenBias, restitution,
bounceThreshold, contact, *solverContact, ccdMaxSeparation, solverOffsetSlop);
}
ptr = p;
}
}
//construct all the frictions
PxU8* PX_RESTRICT ptr2 = workspace;
bool hasFriction = false;
for(PxU32 i=0;i< frictionPatchCount;i++)
{
const PxU32 contactCount = c.frictionPatchContactCounts[i];
if(contactCount == 0)
continue;
const Gu::ContactPoint* contactBase0 = buffer.contacts + c.contactPatches[c.correlationListHeads[i]].start;
SolverContactCoulombHeader* header = reinterpret_cast<SolverContactCoulombHeader*>(ptr2);
header->frictionOffset = PxU16(ptr - ptr2);// + sizeof(SolverFrictionHeader);
ptr2 += sizeof(SolverContactCoulombHeader) + header->numNormalConstr * pointStride;
const PxReal staticFriction = contactBase0->staticFriction;
const bool disableStrongFriction = !!(contactBase0->materialFlags & PxMaterialFlag::eDISABLE_FRICTION);
const bool haveFriction = (disableStrongFriction == 0);
SolverFrictionHeader* frictionHeader = reinterpret_cast<SolverFrictionHeader*>(ptr);
frictionHeader->numNormalConstr = Ps::to8(c.frictionPatchContactCounts[i]);
frictionHeader->numFrictionConstr = Ps::to8(haveFriction ? c.frictionPatchContactCounts[i] * frictionPerPointCount : 0);
ptr += sizeof(SolverFrictionHeader);
PxF32* appliedForceBuffer = reinterpret_cast<PxF32*>(ptr);
ptr += frictionHeader->getAppliedForcePaddingSize(c.frictionPatchContactCounts[i]);
PxMemZero(appliedForceBuffer, sizeof(PxF32)*contactCount*frictionPerPointCount);
Ps::prefetchLine(ptr, 128);
Ps::prefetchLine(ptr, 256);
Ps::prefetchLine(ptr, 384);
const Vec3V normal = V3LoadU(buffer.contacts[c.contactPatches[c.correlationListHeads[i]].start].normal);
const FloatV normalX = V3GetX(normal);
const FloatV normalY = V3GetY(normal);
const FloatV normalZ = V3GetZ(normal);
const Vec3V t0Fallback1 = V3Merge(zero, FNeg(normalZ), normalY);
const Vec3V t0Fallback2 = V3Merge(FNeg(normalY), normalX, zero) ;
const BoolV con = FIsGrtr(orthoThreshold, FAbs(normalX));
const Vec3V tFallback1 = V3Sel(con, t0Fallback1, t0Fallback2);
const Vec3V linVrel = V3Sub(linVel0, linVel1);
const Vec3V t0_ = V3Sub(linVrel, V3Scale(normal, V3Dot(normal, linVrel)));
const FloatV sqDist = V3Dot(t0_,t0_);
const BoolV con1 = FIsGrtr(sqDist, eps);
const Vec3V tDir0 =V3Normalize(V3Sel(con1, t0_, tFallback1));
const Vec3V tDir1 = V3Cross(tDir0, normal);
Vec3V tFallback = tDir0;
Vec3V tFallbackAlt = tDir1;
if(haveFriction)
{
//frictionHeader->setStaticFriction(n.staticFriction);
frictionHeader->setStaticFriction(staticFriction);
FStore(invMass0_dom0fV, &frictionHeader->invMass0D0);
FStore(FNeg(invMass1_dom1fV), &frictionHeader->invMass1D1);
FStore(angD0, &frictionHeader->angDom0);
FStore(angD1, &frictionHeader->angDom1);
frictionHeader->type = frictionHeaderType;
PxU32 totalPatchContactCount = 0;
for(PxU32 patch=c.correlationListHeads[i];
patch!=CorrelationBuffer::LIST_END;
patch = c.contactPatches[patch].next)
{
const PxU32 count = c.contactPatches[patch].count;
const PxU32 start = c.contactPatches[patch].start;
const Gu::ContactPoint* contactBase = buffer.contacts + start;
PxU8* p = ptr;
for(PxU32 j =0; j < count; j++)
{
hasFriction = true;
const Gu::ContactPoint& contact = contactBase[j];
const Vec3V point = V3LoadU(contact.point);
Vec3V ra = V3Sub(point, bodyFrame0p);
Vec3V rb = V3Sub(point, bodyFrame1p);
ra = V3Sel(V3IsGrtr(solverOffsetSlop, V3Abs(ra)), V3Zero(), ra);
rb = V3Sel(V3IsGrtr(solverOffsetSlop, V3Abs(rb)), V3Zero(), rb);
const Vec3V targetVel = V3LoadU(contact.targetVel);
for(PxU32 k = 0; k < frictionPerPointCount; ++k)
{
const Vec3V t0 = tFallback;
tFallback = tFallbackAlt;
tFallbackAlt = t0;
SolverContactFriction* PX_RESTRICT f0 = reinterpret_cast<SolverContactFriction*>(p);
p += frictionStride;
//f0->brokenOrContactIndex = contactId;
const Vec3V raXn = V3Cross(ra, t0);
const Vec3V rbXn = V3Cross(rb, t0);
const Vec3V delAngVel0 = M33MulV3(invSqrtInertia0, raXn);
const Vec3V delAngVel1 = M33MulV3(invSqrtInertia1, rbXn);
const FloatV resp0 = FAdd(invMass0_dom0fV, FMul(angD0, V3Dot(delAngVel0, delAngVel0)));
const FloatV resp1 = FSub(FMul(angD1, V3Dot(delAngVel1, delAngVel1)), invMass1_dom1fV);
const FloatV resp = FAdd(resp0, resp1);
const FloatV velMultiplier = FNeg(FSel(FIsGrtr(resp, zero), FRecip(resp), zero));
const FloatV vrel1 = FAdd(V3Dot(t0, linVel0), V3Dot(raXn, angVel0));
const FloatV vrel2 = FAdd(V3Dot(t0, linVel1), V3Dot(rbXn, angVel1));
const FloatV vrel = FSub(vrel1, vrel2);
f0->normalXYZ_appliedForceW = V4SetW(Vec4V_From_Vec3V(t0), zero);
f0->raXnXYZ_velMultiplierW = V4SetW(Vec4V_From_Vec3V(delAngVel0), velMultiplier);
//f0->rbXnXYZ_targetVelocityW = V4SetW(Vec4V_From_Vec3V(delAngVel1), FSub(V3Dot(targetVel, t0), vrel));
f0->rbXnXYZ_biasW = Vec4V_From_Vec3V(delAngVel1);
FStore(FSub(V3Dot(targetVel, t0), vrel), &f0->targetVel);
}
}
totalPatchContactCount += c.contactPatches[patch].count;
ptr = p;
}
}
}
*ptr = 0;
return hasFriction;
}
static void computeBlockStreamByteSizesCoulomb(const CorrelationBuffer& c,
const PxU32 frictionCountPerPoint, PxU32& _solverConstraintByteSize,
PxU32& _axisConstraintCount,
bool useExtContacts)
{
PX_ASSERT(0 == _solverConstraintByteSize);
PX_ASSERT(0 == _axisConstraintCount);
// PT: use local vars to remove LHS
PxU32 solverConstraintByteSize = 0;
PxU32 numFrictionPatches = 0;
PxU32 axisConstraintCount = 0;
for(PxU32 i = 0; i < c.frictionPatchCount; i++)
{
//Friction patches.
if(c.correlationListHeads[i] != CorrelationBuffer::LIST_END)
numFrictionPatches++;
const FrictionPatch& frictionPatch = c.frictionPatches[i];
const bool haveFriction = (frictionPatch.materialFlags & PxMaterialFlag::eDISABLE_FRICTION) == 0;
//Solver constraint data.
if(c.frictionPatchContactCounts[i]!=0)
{
solverConstraintByteSize += sizeof(SolverContactCoulombHeader);
solverConstraintByteSize += useExtContacts ? c.frictionPatchContactCounts[i] * sizeof(SolverContactPointExt)
: c.frictionPatchContactCounts[i] * sizeof(SolverContactPoint);
axisConstraintCount += c.frictionPatchContactCounts[i];
//We always need the friction headers to write the accumulated
if(haveFriction)
{
//4 bytes
solverConstraintByteSize += sizeof(SolverFrictionHeader);
//buffer to store applied forces in
solverConstraintByteSize += SolverFrictionHeader::getAppliedForcePaddingSize(c.frictionPatchContactCounts[i]);
const PxU32 nbFrictionConstraints = c.frictionPatchContactCounts[i] * frictionCountPerPoint;
solverConstraintByteSize += useExtContacts ? nbFrictionConstraints * sizeof(SolverContactFrictionExt)
: nbFrictionConstraints * sizeof(SolverContactFriction);
axisConstraintCount += c.frictionPatchContactCounts[i];
}
else
{
//reserve buffers for storing accumulated impulses
solverConstraintByteSize += sizeof(SolverFrictionHeader);
solverConstraintByteSize += SolverFrictionHeader::getAppliedForcePaddingSize(c.frictionPatchContactCounts[i]);
}
}
}
_axisConstraintCount = axisConstraintCount;
//16-byte alignment.
_solverConstraintByteSize = ((solverConstraintByteSize + 0x0f) & ~0x0f);
PX_ASSERT(0 == (_solverConstraintByteSize & 0x0f));
}
static bool reserveBlockStreamsCoulomb(const CorrelationBuffer& c,
PxU8*& solverConstraint, PxU32 frictionCountPerPoint,
PxU32& solverConstraintByteSize,
PxU32& axisConstraintCount, PxConstraintAllocator& constraintAllocator,
bool useExtContacts)
{
PX_ASSERT(NULL == solverConstraint);
PX_ASSERT(0 == solverConstraintByteSize);
PX_ASSERT(0 == axisConstraintCount);
//From constraintBlockStream we need to reserve contact points, contact forces, and a char buffer for the solver constraint data (already have a variable for this).
//From frictionPatchStream we just need to reserve a single buffer.
//Compute the sizes of all the buffers.
computeBlockStreamByteSizesCoulomb(
c,
frictionCountPerPoint, solverConstraintByteSize,
axisConstraintCount, useExtContacts);
//Reserve the buffers.
//First reserve the accumulated buffer size for the constraint block.
PxU8* constraintBlock = NULL;
const PxU32 constraintBlockByteSize = solverConstraintByteSize;
if(constraintBlockByteSize > 0)
{
constraintBlock = constraintAllocator.reserveConstraintData(constraintBlockByteSize + 16u);
if(0==constraintBlock || (reinterpret_cast<PxU8*>(-1))==constraintBlock)
{
if(0==constraintBlock)
{
PX_WARN_ONCE(
"Reached limit set by PxSceneDesc::maxNbContactDataBlocks - ran out of buffer space for constraint prep. "
"Either accept dropped contacts or increase buffer size allocated for narrow phase by increasing PxSceneDesc::maxNbContactDataBlocks.");
}
else
{
PX_WARN_ONCE(
"Attempting to allocate more than 16K of contact data for a single contact pair in constraint prep. "
"Either accept dropped contacts or simplify collision geometry.");
constraintBlock=NULL;
}
}
}
//Patch up the individual ptrs to the buffer returned by the constraint block reservation (assuming the reservation didn't fail).
if(0==constraintBlockByteSize || constraintBlock)
{
if(solverConstraintByteSize)
{
solverConstraint = constraintBlock;
PX_ASSERT(0==(uintptr_t(solverConstraint) & 0x0f));
}
}
//Return true if neither of the two block reservations failed.
return ((0==constraintBlockByteSize || constraintBlock));
}
bool createFinalizeSolverContactsCoulomb1D(PxSolverContactDesc& contactDesc,
PxsContactManagerOutput& output,
ThreadContext& threadContext,
const PxReal invDtF32,
PxReal bounceThresholdF32,
PxReal frictionOffsetThreshold,
PxReal correlationDistance,
PxReal solverOffsetSlop,
PxConstraintAllocator& constraintAllocator,
Cm::SpatialVectorF* Z)
{
return createFinalizeSolverContactsCoulomb(contactDesc, output, threadContext, invDtF32, bounceThresholdF32, frictionOffsetThreshold, correlationDistance, solverOffsetSlop,
constraintAllocator, PxFrictionType::eONE_DIRECTIONAL, Z);
}
bool createFinalizeSolverContactsCoulomb2D(PxSolverContactDesc& contactDesc,
PxsContactManagerOutput& output,
ThreadContext& threadContext,
const PxReal invDtF32,
PxReal bounceThresholdF32,
PxReal frictionOffsetThreshold,
PxReal correlationDistance,
PxReal solverOffsetSlop,
PxConstraintAllocator& constraintAllocator,
Cm::SpatialVectorF* Z)
{
return createFinalizeSolverContactsCoulomb(contactDesc, output, threadContext, invDtF32, bounceThresholdF32, frictionOffsetThreshold, correlationDistance, solverOffsetSlop,
constraintAllocator, PxFrictionType::eTWO_DIRECTIONAL, Z);
}
bool createFinalizeSolverContactsCoulomb(PxSolverContactDesc& contactDesc,
PxsContactManagerOutput& output,
ThreadContext& threadContext,
const PxReal invDtF32,
PxReal bounceThresholdF32,
PxReal frictionOffsetThreshold,
PxReal correlationDistance,
PxReal solverOffsetSlop,
PxConstraintAllocator& constraintAllocator,
PxFrictionType::Enum frictionType,
Cm::SpatialVectorF* Z)
{
PX_UNUSED(frictionOffsetThreshold);
PX_UNUSED(correlationDistance);
PxSolverConstraintDesc& desc = *contactDesc.desc;
desc.constraintLengthOver16 = 0;
ContactBuffer& buffer = threadContext.mContactBuffer;
buffer.count = 0;
// We pull the friction patches out of the cache to remove the dependency on how
// the cache is organized. Remember original addrs so we can write them back
// efficiently.
Ps::prefetchLine(contactDesc.frictionPtr);
PxReal invMassScale0 = 1.f;
PxReal invMassScale1 = 1.f;
PxReal invInertiaScale0 = 1.f;
PxReal invInertiaScale1 = 1.f;
bool hasMaxImpulse = false, hasTargetVelocity = false;
PxU32 numContacts = extractContacts(buffer, output, hasMaxImpulse, hasTargetVelocity, invMassScale0, invMassScale1,
invInertiaScale0, invInertiaScale1, PxMin(contactDesc.data0->maxContactImpulse, contactDesc.data1->maxContactImpulse));
if(numContacts == 0)
{
contactDesc.frictionPtr = NULL;
contactDesc.frictionCount = 0;
return true;
}
Ps::prefetchLine(contactDesc.body0);
Ps::prefetchLine(contactDesc.body1);
Ps::prefetchLine(contactDesc.data0);
Ps::prefetchLine(contactDesc.data1);
CorrelationBuffer& c = threadContext.mCorrelationBuffer;
c.frictionPatchCount = 0;
c.contactPatchCount = 0;
createContactPatches(c, buffer.contacts, buffer.count, PXC_SAME_NORMAL);
PxU32 numFrictionPerPatch = PxU32(frictionType == PxFrictionType::eONE_DIRECTIONAL ? 1 : 2);
bool overflow = correlatePatches(c, buffer.contacts, contactDesc.bodyFrame0, contactDesc.bodyFrame1, PXC_SAME_NORMAL, 0, 0);
PX_UNUSED(overflow);
#if PX_CHECKED
if(overflow)
{
Ps::getFoundation().error(physx::PxErrorCode::eDEBUG_WARNING, __FILE__, __LINE__,
"Dropping contacts in solver because we exceeded limit of 32 friction patches.");
}
#endif
//PX_ASSERT(patchCount == c.frictionPatchCount);
PxU8* solverConstraint = NULL;
PxU32 solverConstraintByteSize = 0;
PxU32 axisConstraintCount = 0;
const bool useExtContacts = !!((contactDesc.bodyState0 | contactDesc.bodyState1) & PxSolverContactDesc::eARTICULATION);
const bool successfulReserve = reserveBlockStreamsCoulomb(
c,
solverConstraint, numFrictionPerPatch,
solverConstraintByteSize,
axisConstraintCount,
constraintAllocator,
useExtContacts);
// initialise the work unit's ptrs to the various buffers.
contactDesc.frictionPtr = NULL;
desc.constraint = NULL;
desc.constraintLengthOver16 = 0;
contactDesc.frictionCount = 0;
// patch up the work unit with the reserved buffers and set the reserved buffer data as appropriate.
if(successfulReserve)
{
desc.constraint = solverConstraint;
output.nbContacts = Ps::to8(numContacts);
desc.constraintLengthOver16 = Ps::to16(solverConstraintByteSize/16);
//Initialise solverConstraint buffer.
if(solverConstraint)
{
bool hasFriction = false;
const PxSolverBodyData& data0 = *contactDesc.data0;
const PxSolverBodyData& data1 = *contactDesc.data1;
if(useExtContacts)
{
const SolverExtBody b0(reinterpret_cast<const void*>(contactDesc.body0), reinterpret_cast<const void*>(&data0), desc.linkIndexA);
const SolverExtBody b1(reinterpret_cast<const void*>(contactDesc.body1), reinterpret_cast<const void*>(&data1), desc.linkIndexB);
hasFriction = setupFinalizeExtSolverContactsCoulomb(buffer, c, contactDesc.bodyFrame0, contactDesc.bodyFrame1, solverConstraint,
invDtF32, bounceThresholdF32, b0, b1, numFrictionPerPatch,
invMassScale0, invInertiaScale0, invMassScale1, invInertiaScale1, contactDesc.restDistance, contactDesc.maxCCDSeparation, Z);
}
else
{
hasFriction = setupFinalizeSolverConstraintsCoulomb(contactDesc.shapeInteraction, buffer, c, contactDesc.bodyFrame0, contactDesc.bodyFrame1, solverConstraint,
data0, data1, invDtF32, bounceThresholdF32, numFrictionPerPatch, contactDesc.hasForceThresholds, contactDesc.bodyState1 == PxSolverContactDesc::eSTATIC_BODY,
invMassScale0, invInertiaScale0, invMassScale1, invInertiaScale1, contactDesc.restDistance, contactDesc.maxCCDSeparation, solverOffsetSlop);
}
*(reinterpret_cast<PxU32*>(solverConstraint + solverConstraintByteSize)) = 0;
*(reinterpret_cast<PxU32*>(solverConstraint + solverConstraintByteSize + 4)) = hasFriction ? 0xFFFFFFFF : 0;
}
}
return successfulReserve;
}
}
}

View File

@ -0,0 +1,308 @@
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2021 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#ifndef DY_CONTACT_PREP_SHARED_H
#define DY_CONTACT_PREP_SHARED_H
#include "foundation/PxPreprocessor.h"
#include "PxSceneDesc.h"
#include "PsVecMath.h"
#include "PsMathUtils.h"
#include "DyContactPrep.h"
#include "DyCorrelationBuffer.h"
#include "DyArticulationContactPrep.h"
#include "PxsContactManager.h"
#include "PxsContactManagerState.h"
namespace physx
{
namespace Dy
{
PX_FORCE_INLINE bool pointsAreClose(const PxTransform& body1ToBody0,
const PxVec3& localAnchor0, const PxVec3& localAnchor1,
const PxVec3& axis, float correlDist)
{
const PxVec3 body0PatchPoint1 = body1ToBody0.transform(localAnchor1);
return PxAbs((localAnchor0 - body0PatchPoint1).dot(axis))<correlDist;
}
PX_FORCE_INLINE bool isSeparated(const FrictionPatch& patch, const PxTransform& body1ToBody0, const PxReal correlationDistance)
{
PX_ASSERT(patch.anchorCount <= 2);
for(PxU32 a = 0; a < patch.anchorCount; ++a)
{
if(!pointsAreClose(body1ToBody0, patch.body0Anchors[a], patch.body1Anchors[a], patch.body0Normal, correlationDistance))
return true;
}
return false;
}
inline bool getFrictionPatches(CorrelationBuffer& c,
const PxU8* frictionCookie,
PxU32 frictionPatchCount,
const PxTransform& bodyFrame0,
const PxTransform& bodyFrame1,
PxReal correlationDistance)
{
PX_UNUSED(correlationDistance);
if(frictionCookie == NULL || frictionPatchCount == 0)
return true;
//KS - this is now DMA'd inside the shader so we don't need to immediate DMA it here
const FrictionPatch* patches = reinterpret_cast<const FrictionPatch*>(frictionCookie);
//Try working out relative transforms! TODO - can we compute this lazily for the first friction patch
bool evaluated = false;
PxTransform body1ToBody0;
while(frictionPatchCount--)
{
Ps::prefetchLine(patches,128);
const FrictionPatch& patch = *patches++;
PX_ASSERT (patch.broken == 0 || patch.broken == 1);
if(!patch.broken)
{
// if the eDISABLE_STRONG_FRICTION flag is there we need to blow away the previous frame's friction correlation, so
// that we can associate each friction anchor with a target velocity. So we lose strong friction.
if(patch.anchorCount != 0 && !(patch.materialFlags & PxMaterialFlag::eDISABLE_STRONG_FRICTION))
{
PX_ASSERT(patch.anchorCount <= 2);
if(!evaluated)
{
body1ToBody0 = bodyFrame0.transformInv(bodyFrame1);
evaluated = true;
}
if(patch.body0Normal.dot(body1ToBody0.rotate(patch.body1Normal)) > PXC_SAME_NORMAL)
{
if(!isSeparated(patch, body1ToBody0, correlationDistance))
{
if(c.frictionPatchCount == CorrelationBuffer::MAX_FRICTION_PATCHES)
return false;
{
c.contactID[c.frictionPatchCount][0] = 0xffff;
c.contactID[c.frictionPatchCount][1] = 0xffff;
//Rotate the contact normal into world space
c.frictionPatchWorldNormal[c.frictionPatchCount] = bodyFrame0.rotate(patch.body0Normal);
c.frictionPatchContactCounts[c.frictionPatchCount] = 0;
c.patchBounds[c.frictionPatchCount].setEmpty();
c.correlationListHeads[c.frictionPatchCount] = CorrelationBuffer::LIST_END;
PxMemCopy(&c.frictionPatches[c.frictionPatchCount++], &patch, sizeof(FrictionPatch));
}
}
}
}
}
}
return true;
}
PX_FORCE_INLINE PxU32 extractContacts(Gu::ContactBuffer& buffer, PxsContactManagerOutput& npOutput, bool& hasMaxImpulse, bool& hasTargetVelocity,
PxReal& invMassScale0, PxReal& invMassScale1, PxReal& invInertiaScale0, PxReal& invInertiaScale1, PxReal defaultMaxImpulse)
{
PxContactStreamIterator iter(npOutput.contactPatches, npOutput.contactPoints, npOutput.getInternalFaceIndice(), npOutput.nbPatches, npOutput.nbContacts);
PxU32 numContacts = buffer.count, origContactCount = buffer.count;
if(!iter.forceNoResponse)
{
invMassScale0 = iter.getInvMassScale0();
invMassScale1 = iter.getInvMassScale1();
invInertiaScale0 = iter.getInvInertiaScale0();
invInertiaScale1 = iter.getInvInertiaScale1();
hasMaxImpulse = (iter.patch->internalFlags & PxContactPatch::eHAS_MAX_IMPULSE) != 0;
hasTargetVelocity = (iter.patch->internalFlags & PxContactPatch::eHAS_TARGET_VELOCITY) != 0;
while(iter.hasNextPatch())
{
iter.nextPatch();
while(iter.hasNextContact())
{
iter.nextContact();
Ps::prefetchLine(iter.contact, 128);
Ps::prefetchLine(&buffer.contacts[numContacts], 128);
PxReal maxImpulse = hasMaxImpulse ? iter.getMaxImpulse() : defaultMaxImpulse;
if(maxImpulse != 0.f)
{
PX_ASSERT(numContacts < Gu::ContactBuffer::MAX_CONTACTS);
buffer.contacts[numContacts].normal = iter.getContactNormal();
buffer.contacts[numContacts].point = iter.getContactPoint();
buffer.contacts[numContacts].separation = iter.getSeparation();
//KS - we use the face indices to cache the material indices and flags - avoids bloating the PxContact structure
buffer.contacts[numContacts].materialFlags = PxU8(iter.getMaterialFlags());
buffer.contacts[numContacts].maxImpulse = maxImpulse;
buffer.contacts[numContacts].staticFriction = iter.getStaticFriction();
buffer.contacts[numContacts].dynamicFriction = iter.getDynamicFriction();
buffer.contacts[numContacts].restitution = iter.getRestitution();
const PxVec3& targetVel = iter.getTargetVel();
buffer.contacts[numContacts].targetVel = targetVel;
++numContacts;
}
}
}
}
const PxU32 contactCount = numContacts - origContactCount;
buffer.count = numContacts;
return contactCount;
}
struct CorrelationListIterator
{
CorrelationBuffer& buffer;
PxU32 currPatch;
PxU32 currContact;
CorrelationListIterator(CorrelationBuffer& correlationBuffer, PxU32 startPatch) : buffer(correlationBuffer)
{
//We need to force us to advance the correlation buffer to the first available contact (if one exists)
PxU32 newPatch = startPatch, newContact = 0;
while(newPatch != CorrelationBuffer::LIST_END && newContact == buffer.contactPatches[newPatch].count)
{
newPatch = buffer.contactPatches[newPatch].next;
newContact = 0;
}
currPatch = newPatch;
currContact = newContact;
}
//Returns true if it has another contact pre-loaded. Returns false otherwise
PX_FORCE_INLINE bool hasNextContact()
{
return (currPatch != CorrelationBuffer::LIST_END && currContact < buffer.contactPatches[currPatch].count);
}
inline void nextContact(PxU32& patch, PxU32& contact)
{
PX_ASSERT(currPatch != CorrelationBuffer::LIST_END);
PX_ASSERT(currContact < buffer.contactPatches[currPatch].count);
patch = currPatch;
contact = currContact;
PxU32 newPatch = currPatch, newContact = currContact + 1;
while(newPatch != CorrelationBuffer::LIST_END && newContact == buffer.contactPatches[newPatch].count)
{
newPatch = buffer.contactPatches[newPatch].next;
newContact = 0;
}
currPatch = newPatch;
currContact = newContact;
}
private:
CorrelationListIterator& operator=(const CorrelationListIterator&);
};
PX_FORCE_INLINE void constructContactConstraint(const Mat33V& invSqrtInertia0, const Mat33V& invSqrtInertia1, const FloatVArg invMassNorLenSq0,
const FloatVArg invMassNorLenSq1, const FloatVArg angD0, const FloatVArg angD1, const Vec3VArg bodyFrame0p, const Vec3VArg bodyFrame1p,
const Vec3VArg normal, const FloatVArg norVel, const VecCrossV& norCross, const Vec3VArg angVel0, const Vec3VArg angVel1,
const FloatVArg invDt, const FloatVArg invDtp8, const FloatVArg restDistance, const FloatVArg maxPenBias, const FloatVArg restitution,
const FloatVArg bounceThreshold, const Gu::ContactPoint& contact, SolverContactPoint& solverContact,
const FloatVArg ccdMaxSeparation, const Vec3VArg solverOffsetSlop)
{
const FloatV zero = FZero();
const Vec3V point = V3LoadA(contact.point);
const FloatV separation = FLoad(contact.separation);
const FloatV cTargetVel = V3Dot(normal, V3LoadA(contact.targetVel));
const Vec3V ra = V3Sub(point, bodyFrame0p);
const Vec3V rb = V3Sub(point, bodyFrame1p);
/*ra = V3Sel(V3IsGrtr(solverOffsetSlop, V3Abs(ra)), V3Zero(), ra);
rb = V3Sel(V3IsGrtr(solverOffsetSlop, V3Abs(rb)), V3Zero(), rb);*/
Vec3V raXn = V3Cross(ra, norCross);
Vec3V rbXn = V3Cross(rb, norCross);
raXn = V3Sel(V3IsGrtr(solverOffsetSlop, V3Abs(raXn)), V3Zero(), raXn);
rbXn = V3Sel(V3IsGrtr(solverOffsetSlop, V3Abs(rbXn)), V3Zero(), rbXn);
const Vec3V raXnSqrtInertia = M33MulV3(invSqrtInertia0, raXn);
const Vec3V rbXnSqrtInertia = M33MulV3(invSqrtInertia1, rbXn);
const FloatV resp0 = FAdd(invMassNorLenSq0, FMul(V3Dot(raXnSqrtInertia, raXnSqrtInertia), angD0));
const FloatV resp1 = FSub(FMul(V3Dot(rbXnSqrtInertia, rbXnSqrtInertia), angD1), invMassNorLenSq1);
const FloatV unitResponse = FAdd(resp0, resp1);
const FloatV vrel1 = FAdd(norVel, V3Dot(raXn, angVel0));
const FloatV vrel2 = V3Dot(rbXn, angVel1);
const FloatV vrel = FSub(vrel1, vrel2);
const FloatV velMultiplier = FSel(FIsGrtr(unitResponse, zero), FRecip(unitResponse), zero);
const FloatV penetration = FSub(separation, restDistance);
const FloatV penetrationInvDt = FMul(penetration, invDt);
const FloatV penetrationInvDtPt8 = FMax(maxPenBias, FMul(penetration, invDtp8));
FloatV scaledBias = FMul(velMultiplier, penetrationInvDtPt8);
const BoolV isGreater2 = BAnd(BAnd(FIsGrtr(restitution, zero), FIsGrtr(bounceThreshold, vrel)), FIsGrtr(FNeg(vrel), penetrationInvDt));
const BoolV ccdSeparationCondition = FIsGrtrOrEq(ccdMaxSeparation, penetration);
scaledBias = FSel(BAnd(ccdSeparationCondition, isGreater2), zero, scaledBias);
const FloatV sumVRel(vrel);
FloatV targetVelocity = FAdd(cTargetVel, FSel(isGreater2, FMul(FNeg(sumVRel), restitution), zero));
//Note - we add on the initial target velocity
targetVelocity = FSub(targetVelocity, vrel);
const FloatV biasedErr = FScaleAdd(targetVelocity, velMultiplier, FNeg(scaledBias));
const FloatV unbiasedErr = FScaleAdd(targetVelocity, velMultiplier, FSel(isGreater2, zero, FNeg(FMax(scaledBias, zero))));
//const FloatV unbiasedErr = FScaleAdd(targetVelocity, velMultiplier, FNeg(FMax(scaledBias, zero)));
FStore(velMultiplier, &solverContact.velMultiplier);
FStore(biasedErr, &solverContact.biasedErr);
FStore(unbiasedErr, &solverContact.unbiasedErr);
solverContact.maxImpulse = contact.maxImpulse;
solverContact.raXn = raXnSqrtInertia;
solverContact.rbXn = rbXnSqrtInertia;
}
}
}
#endif //DY_CONTACT_PREP_SHARED_H

View File

@ -0,0 +1,409 @@
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2021 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#ifndef DY_CONTACT_REDUCTION_H
#define DY_CONTACT_REDUCTION_H
#include "geomutils/GuContactPoint.h"
#include "PxsMaterialManager.h"
namespace physx
{
namespace Dy
{
//KS - might be OK with 4 but 5 guarantees the deepest + 4 contacts that contribute to largest surface area
#define CONTACT_REDUCTION_MAX_CONTACTS 6
#define CONTACT_REDUCTION_MAX_PATCHES 32
#define PXS_NORMAL_TOLERANCE 0.995f
#define PXS_SEPARATION_TOLERANCE 0.001f
//A patch contains a normal, pair of material indices and a list of indices. These indices are
//used to index into the PxContact array that's passed by the user
struct ReducedContactPatch
{
PxU32 numContactPoints;
PxU32 contactPoints[CONTACT_REDUCTION_MAX_CONTACTS];
};
struct ContactPatch
{
PxVec3 rootNormal;
ContactPatch* mNextPatch;
PxReal maxPenetration;
PxU16 startIndex;
PxU16 stride;
PxU16 rootIndex;
PxU16 index;
};
struct SortBoundsPredicateManifold
{
bool operator()(const ContactPatch* idx1, const ContactPatch* idx2) const
{
return idx1->maxPenetration < idx2->maxPenetration;
}
};
template <PxU32 MaxPatches>
class ContactReduction
{
public:
ReducedContactPatch mPatches[MaxPatches];
PxU32 mNumPatches;
ContactPatch mIntermediatePatches[CONTACT_REDUCTION_MAX_PATCHES];
ContactPatch* mIntermediatePatchesPtrs[CONTACT_REDUCTION_MAX_PATCHES];
PxU32 mNumIntermediatePatches;
Gu::ContactPoint* PX_RESTRICT mOriginalContacts;
PxsMaterialInfo* PX_RESTRICT mMaterialInfo;
PxU32 mNumOriginalContacts;
ContactReduction(Gu::ContactPoint* PX_RESTRICT originalContacts, PxsMaterialInfo* PX_RESTRICT materialInfo, PxU32 numContacts) :
mNumPatches(0), mNumIntermediatePatches(0), mOriginalContacts(originalContacts), mMaterialInfo(materialInfo), mNumOriginalContacts(numContacts)
{
}
void reduceContacts()
{
//First pass, break up into contact patches, storing the start and stride of the patches
//We will need to have contact patches and then coallesce them
mIntermediatePatches[0].rootNormal = mOriginalContacts[0].normal;
mIntermediatePatches[0].mNextPatch = NULL;
mIntermediatePatches[0].startIndex = 0;
mIntermediatePatches[0].rootIndex = 0;
mIntermediatePatches[0].maxPenetration = mOriginalContacts[0].separation;
mIntermediatePatches[0].index = 0;
PxU16 numPatches = 1;
//PxU32 startIndex = 0;
PxU32 numUniquePatches = 1;
PxU16 m = 1;
for(; m < mNumOriginalContacts; ++m)
{
PxI32 index = -1;
for(PxU32 b = numPatches; b > 0; --b)
{
ContactPatch& patch = mIntermediatePatches[b-1];
if(mMaterialInfo[patch.startIndex].mMaterialIndex0 == mMaterialInfo[m].mMaterialIndex0 && mMaterialInfo[patch.startIndex].mMaterialIndex1 == mMaterialInfo[m].mMaterialIndex1 &&
patch.rootNormal.dot(mOriginalContacts[m].normal) >= PXS_NORMAL_TOLERANCE)
{
index = PxI32(b-1);
break;
}
}
if(index != numPatches - 1)
{
mIntermediatePatches[numPatches-1].stride = PxU16(m - mIntermediatePatches[numPatches - 1].startIndex);
//Create a new patch...
if(numPatches == CONTACT_REDUCTION_MAX_PATCHES)
{
break;
}
mIntermediatePatches[numPatches].startIndex = m;
mIntermediatePatches[numPatches].mNextPatch = NULL;
if(index == -1)
{
mIntermediatePatches[numPatches].rootIndex = numPatches;
mIntermediatePatches[numPatches].rootNormal = mOriginalContacts[m].normal;
mIntermediatePatches[numPatches].maxPenetration = mOriginalContacts[m].separation;
mIntermediatePatches[numPatches].index = numPatches;
++numUniquePatches;
}
else
{
//Find last element in the link
PxU16 rootIndex = mIntermediatePatches[index].rootIndex;
mIntermediatePatches[index].mNextPatch = &mIntermediatePatches[numPatches];
mIntermediatePatches[numPatches].rootNormal = mIntermediatePatches[index].rootNormal;
mIntermediatePatches[rootIndex].maxPenetration = mIntermediatePatches[numPatches].maxPenetration = PxMin(mIntermediatePatches[rootIndex].maxPenetration, mOriginalContacts[m].separation);
mIntermediatePatches[numPatches].rootIndex = rootIndex;
mIntermediatePatches[numPatches].index = numPatches;
}
++numPatches;
}
}
mIntermediatePatches[numPatches-1].stride = PxU16(m - mIntermediatePatches[numPatches-1].startIndex);
//OK, we have a list of contact patches so that we can start contact reduction per-patch
//OK, now we can go and reduce the contacts on a per-patch basis...
for(PxU32 a = 0; a < numPatches; ++a)
{
mIntermediatePatchesPtrs[a] = &mIntermediatePatches[a];
}
SortBoundsPredicateManifold predicate;
Ps::sort(mIntermediatePatchesPtrs, numPatches, predicate);
PxU32 numReducedPatches = 0;
for(PxU32 a = 0; a < numPatches; ++a)
{
if(mIntermediatePatchesPtrs[a]->rootIndex == mIntermediatePatchesPtrs[a]->index)
{
//Reduce this patch...
if(numReducedPatches == MaxPatches)
break;
ReducedContactPatch& reducedPatch = mPatches[numReducedPatches++];
//OK, now we need to work out if we have to reduce patches...
PxU32 contactCount = 0;
{
ContactPatch* tmpPatch = mIntermediatePatchesPtrs[a];
while(tmpPatch)
{
contactCount += tmpPatch->stride;
tmpPatch = tmpPatch->mNextPatch;
}
}
if(contactCount <= CONTACT_REDUCTION_MAX_CONTACTS)
{
//Just add the contacts...
ContactPatch* tmpPatch = mIntermediatePatchesPtrs[a];
PxU32 ind = 0;
while(tmpPatch)
{
for(PxU32 b = 0; b < tmpPatch->stride; ++b)
{
reducedPatch.contactPoints[ind++] = tmpPatch->startIndex + b;
}
tmpPatch = tmpPatch->mNextPatch;
}
reducedPatch.numContactPoints = contactCount;
}
else
{
//Iterate through and find the most extreme point
PxU32 ind = 0;
{
PxReal dist = 0.f;
ContactPatch* tmpPatch = mIntermediatePatchesPtrs[a];
while(tmpPatch)
{
for(PxU32 b = 0; b < tmpPatch->stride; ++b)
{
PxReal magSq = mOriginalContacts[tmpPatch->startIndex + b].point.magnitudeSquared();
if(dist < magSq)
{
ind = tmpPatch->startIndex + b;
dist = magSq;
}
}
tmpPatch = tmpPatch->mNextPatch;
}
}
reducedPatch.contactPoints[0] = ind;
const PxVec3 p0 = mOriginalContacts[ind].point;
//Now find the point farthest from this point...
{
PxReal maxDist = 0.f;
ContactPatch* tmpPatch = mIntermediatePatchesPtrs[a];
while(tmpPatch)
{
for(PxU32 b = 0; b < tmpPatch->stride; ++b)
{
PxReal magSq = (p0 - mOriginalContacts[tmpPatch->startIndex + b].point).magnitudeSquared();
if(magSq > maxDist)
{
ind = tmpPatch->startIndex + b;
maxDist = magSq;
}
}
tmpPatch = tmpPatch->mNextPatch;
}
}
reducedPatch.contactPoints[1] = ind;
const PxVec3 p1 = mOriginalContacts[ind].point;
//Now find the point farthest from the segment
PxVec3 n = (p0 - p1).cross(mIntermediatePatchesPtrs[a]->rootNormal);
//PxReal tVal = 0.f;
{
PxReal maxDist = 0.f;
//PxReal tmpTVal;
ContactPatch* tmpPatch = mIntermediatePatchesPtrs[a];
while(tmpPatch)
{
for(PxU32 b = 0; b < tmpPatch->stride; ++b)
{
//PxReal magSq = tmpDistancePointSegmentSquared(p0, p1, mOriginalContacts[tmpPatch->startIndex + b].point, tmpTVal);
PxReal magSq = (mOriginalContacts[tmpPatch->startIndex + b].point - p0).dot(n);
if(magSq > maxDist)
{
ind = tmpPatch->startIndex + b;
//tVal = tmpTVal;
maxDist = magSq;
}
}
tmpPatch = tmpPatch->mNextPatch;
}
}
reducedPatch.contactPoints[2] = ind;
//const PxVec3 closest = (p0 + (p1 - p0) * tVal);
const PxVec3 dir = -n;//closest - p3;
{
PxReal maxDist = 0.f;
//PxReal tVal = 0.f;
ContactPatch* tmpPatch = mIntermediatePatchesPtrs[a];
while(tmpPatch)
{
for(PxU32 b = 0; b < tmpPatch->stride; ++b)
{
PxReal magSq = (mOriginalContacts[tmpPatch->startIndex + b].point - p0).dot(dir);
if(magSq > maxDist)
{
ind = tmpPatch->startIndex + b;
maxDist = magSq;
}
}
tmpPatch = tmpPatch->mNextPatch;
}
}
reducedPatch.contactPoints[3] = ind;
//Now, we iterate through all the points, and cluster the points. From this, we establish the deepest point that's within a
//tolerance of this point and keep that point
PxReal separation[CONTACT_REDUCTION_MAX_CONTACTS];
PxU32 deepestInd[CONTACT_REDUCTION_MAX_CONTACTS];
for(PxU32 i = 0; i < 4; ++i)
{
PxU32 index = reducedPatch.contactPoints[i];
separation[i] = mOriginalContacts[index].separation - PXS_SEPARATION_TOLERANCE;
deepestInd[i] = index;
}
ContactPatch* tmpPatch = mIntermediatePatchesPtrs[a];
while(tmpPatch)
{
for(PxU32 b = 0; b < tmpPatch->stride; ++b)
{
Gu::ContactPoint& point = mOriginalContacts[tmpPatch->startIndex + b];
PxReal distance = PX_MAX_REAL;
PxU32 index = 0;
for(PxU32 c = 0; c < 4; ++c)
{
PxVec3 dif = mOriginalContacts[reducedPatch.contactPoints[c]].point - point.point;
PxReal d = dif.magnitudeSquared();
if(distance > d)
{
distance = d;
index = c;
}
}
if(separation[index] > point.separation)
{
deepestInd[index] = tmpPatch->startIndex+b;
separation[index] = point.separation;
}
}
tmpPatch = tmpPatch->mNextPatch;
}
bool chosen[64];
PxMemZero(chosen, sizeof(chosen));
for(PxU32 i = 0; i < 4; ++i)
{
reducedPatch.contactPoints[i] = deepestInd[i];
chosen[deepestInd[i]] = true;
}
for(PxU32 i = 4; i < CONTACT_REDUCTION_MAX_CONTACTS; ++i)
{
separation[i] = PX_MAX_REAL;
deepestInd[i] = 0;
}
tmpPatch = mIntermediatePatchesPtrs[a];
while(tmpPatch)
{
for(PxU32 b = 0; b < tmpPatch->stride; ++b)
{
if(!chosen[tmpPatch->startIndex+b])
{
Gu::ContactPoint& point = mOriginalContacts[tmpPatch->startIndex + b];
for(PxU32 j = 4; j < CONTACT_REDUCTION_MAX_CONTACTS; ++j)
{
if(point.separation < separation[j])
{
for(PxU32 k = CONTACT_REDUCTION_MAX_CONTACTS-1; k > j; --k)
{
separation[k] = separation[k-1];
deepestInd[k] = deepestInd[k-1];
}
separation[j] = point.separation;
deepestInd[j] = tmpPatch->startIndex+b;
break;
}
}
}
}
tmpPatch = tmpPatch->mNextPatch;
}
for(PxU32 i = 4; i < CONTACT_REDUCTION_MAX_CONTACTS; ++i)
{
reducedPatch.contactPoints[i] = deepestInd[i];
}
reducedPatch.numContactPoints = CONTACT_REDUCTION_MAX_CONTACTS;
}
}
}
mNumPatches = numReducedPatches;
}
};
}
}
#endif //DY_CONTACT_REDUCTION_H

View File

@ -0,0 +1,108 @@
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2021 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#ifndef DY_CORRELATIONBUFFER_H
#define DY_CORRELATIONBUFFER_H
#include "foundation/PxSimpleTypes.h"
#include "foundation/PxVec3.h"
#include "foundation/PxTransform.h"
#include "foundation/PxBounds3.h"
#include "geomutils/GuContactBuffer.h"
#include "PxvConfig.h"
#include "DyFrictionPatch.h"
namespace physx
{
struct PxcNpWorkUnit;
struct PxsMaterialInfo;
namespace Dy
{
struct CorrelationBuffer
{
static const PxU32 MAX_FRICTION_PATCHES = 32;
static const PxU16 LIST_END = 0xffff;
struct ContactPatchData
{
PxU16 start;
PxU16 next;
PxU8 flags;
PxU8 count;
PxReal staticFriction, dynamicFriction, restitution;
PxBounds3 patchBounds;
};
// we can have as many contact patches as contacts, unfortunately
ContactPatchData contactPatches[Gu::ContactBuffer::MAX_CONTACTS];
FrictionPatch PX_ALIGN(16, frictionPatches[MAX_FRICTION_PATCHES]);
PxVec3 PX_ALIGN(16, frictionPatchWorldNormal[MAX_FRICTION_PATCHES]);
PxBounds3 patchBounds[MAX_FRICTION_PATCHES];
PxU32 frictionPatchContactCounts[MAX_FRICTION_PATCHES];
PxU32 correlationListHeads[MAX_FRICTION_PATCHES+1];
// contact IDs are only used to identify auxiliary contact data when velocity
// targets have been set.
PxU16 contactID[MAX_FRICTION_PATCHES][2];
PxU32 contactPatchCount, frictionPatchCount;
};
bool createContactPatches(CorrelationBuffer& fb, const Gu::ContactPoint* cb, PxU32 contactCount, PxReal normalTolerance);
bool correlatePatches(CorrelationBuffer& fb,
const Gu::ContactPoint* cb,
const PxTransform& bodyFrame0,
const PxTransform& bodyFrame1,
PxReal normalTolerance,
PxU32 startContactPatchIndex,
PxU32 startFrictionPatchIndex);
void growPatches(CorrelationBuffer& fb,
const Gu::ContactPoint* buffer,
const PxTransform& bodyFrame0,
const PxTransform& bodyFrame1,
PxReal normalTolerance,
PxU32 frictionPatchStartIndex,
PxReal frictionOffsetThreshold);
}
}
#endif //DY_CORRELATIONBUFFER_H

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,490 @@
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2021 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#ifndef DY_DYNAMICS_H
#define DY_DYNAMICS_H
#include "PxvConfig.h"
#include "CmSpatialVector.h"
#include "CmTask.h"
#include "CmPool.h"
#include "PxcThreadCoherentCache.h"
#include "DyThreadContext.h"
#include "PxcConstraintBlockStream.h"
#include "DySolverBody.h"
#include "DyContext.h"
#include "PxsIslandManagerTypes.h"
#include "PxvNphaseImplementationContext.h"
#include "solver/PxSolverDefs.h"
namespace physx
{
namespace Cm
{
class FlushPool;
}
namespace IG
{
class SimpleIslandManager;
struct Edge;
}
class PxsRigidBody;
class PxsStreamedThresholdTable;
struct PxsBodyCore;
struct PxsIslandObjects;
class PxsIslandIndices;
struct PxsIndexedInteraction;
class PxsIslandManager;
struct PxsIndexedConstraint;
struct PxsIndexedContactManager;
class PxsHeapMemoryAllocator;
class PxsMemoryManager;
class PxsDefaultMemoryManager;
struct PxSolverConstraintDesc;
namespace Cm
{
class Bitmap;
class SpatialVector;
}
namespace Dy
{
class SolverCore;
struct SolverIslandParams;
struct ArticulationSolverDesc;
class Articulation;
class DynamicsContext;
#define SOLVER_PARALLEL_METHOD_ARGS \
DynamicsContext& context, \
SolverIslandParams& params, \
IG::IslandSim& islandSim
//typedef void (*PxsSolveParallelMethod)(SOLVER_PARALLEL_METHOD_ARGS);
//extern PxsSolveParallelMethod solveParallel[3];
void solveParallel(SOLVER_PARALLEL_METHOD_ARGS);
void solveParallelCouloumFriction(SOLVER_PARALLEL_METHOD_ARGS);
struct SolverIslandObjects;
/**
\brief Solver body pool (array) that enforces 128-byte alignment for base address of array.
\note This reduces cache misses on platforms with 128-byte-size cache lines by aligning the start of the array to the beginning of a cache line.
*/
class SolverBodyPool : public Ps::Array<PxSolverBody, Ps::AlignedAllocator<128, Ps::ReflectionAllocator<PxSolverBody> > >
{
PX_NOCOPY(SolverBodyPool)
public:
SolverBodyPool() {}
};
/**
\brief Solver body data pool (array) that enforces 128-byte alignment for base address of array.
\note This reduces cache misses on platforms with 128-byte-size cache lines by aligning the start of the array to the beginning of a cache line.
*/
class SolverBodyDataPool : public Ps::Array<PxSolverBodyData, Ps::AlignedAllocator<128, Ps::ReflectionAllocator<PxSolverBodyData> > >
{
PX_NOCOPY(SolverBodyDataPool)
public:
SolverBodyDataPool() {}
};
class SolverConstraintDescPool : public Ps::Array<PxSolverConstraintDesc, Ps::AlignedAllocator<128, Ps::ReflectionAllocator<PxSolverConstraintDesc> > >
{
PX_NOCOPY(SolverConstraintDescPool)
public:
SolverConstraintDescPool() { }
};
/**
\brief Encapsulates an island's context
*/
struct IslandContext
{
//The thread context for this island (set in in the island start task, released in the island end task)
ThreadContext* mThreadContext;
PxsIslandIndices mCounts;
};
/**
\brief Encapsules the data used by the constraint solver.
*/
#if PX_VC
#pragma warning(push)
#pragma warning( disable : 4324 ) // Padding was added at the end of a structure because of a __declspec(align) value.
#endif
class DynamicsContext : public Context
{
PX_NOCOPY(DynamicsContext)
public:
/**
\brief Creates a DynamicsContext associated with a PxsContext
\return A pointer to the newly-created DynamicsContext.
*/
static DynamicsContext* create( PxcNpMemBlockPool* memBlockPool,
PxcScratchAllocator& scratchAllocator,
Cm::FlushPool& taskPool,
PxvSimStats& simStats,
PxTaskManager* taskManager,
Ps::VirtualAllocatorCallback* allocator,
PxsMaterialManager* materialManager,
IG::IslandSim* accurateIslandSim,
PxU64 contextID,
const bool enableStabilization,
const bool useEnhancedDeterminism,
const bool useAdaptiveForce,
const PxReal maxBiasCoefficient,
const bool frictionEveryIteration
);
/**
\brief Destroys this DynamicsContext
*/
void destroy();
/**
\brief Returns the static world solver body
\return The static world solver body.
*/
PX_FORCE_INLINE PxSolverBody& getWorldSolverBody() { return mWorldSolverBody; }
PX_FORCE_INLINE Cm::FlushPool& getTaskPool() { return mTaskPool; }
PX_FORCE_INLINE ThresholdStream& getThresholdStream() { return *mThresholdStream; }
PX_FORCE_INLINE PxvSimStats& getSimStats() { return mSimStats; }
#if PX_ENABLE_SIM_STATS
void addThreadStats(const ThreadContext::ThreadSimStats& stats);
#endif
/**
\brief The entry point for the constraint solver.
\param[in] dt The simulation time-step
\param[in] continuation The continuation task for the solver
This method is called after the island generation has completed. Its main responsibilities are:
(1) Reserving the solver body pools
(2) Initializing the static and kinematic solver bodies, which are shared resources between islands.
(3) Construct the solver task chains for each island
Each island is solved as an independent solver task chain in parallel.
*/
virtual void update(IG::SimpleIslandManager& simpleIslandManager, PxBaseTask* continuation, PxBaseTask* lostTouchTask,
PxsContactManager** foundPatchManagers, PxU32 nbFoundPatchManagers, PxsContactManager** lostPatchManagers, PxU32 nbLostPatchManagers,
PxU32 maxPatchesPerCM, PxsContactManagerOutputIterator& iter, PxsContactManagerOutput* gpuOutputs, const PxReal dt, const PxVec3& gravity, const PxU32 bitMapWordCounts);
void updatePostKinematic(IG::SimpleIslandManager& simpleIslandManager, PxBaseTask* continuation, PxBaseTask* lostTouchTask);
virtual void processLostPatches(IG::SimpleIslandManager& /*simpleIslandManager*/, PxsContactManager** /*lostPatchManagers*/, PxU32 /*nbLostPatchManagers*/, PxsContactManagerOutputIterator& /*iterator*/){}
virtual void updateBodyCore(PxBaseTask* continuation);
virtual void setSimulationController(PxsSimulationController* simulationController ){ mSimulationController = simulationController; }
/**
\brief This method combines the results of several islands, e.g. constructing scene-level simulation statistics and merging together threshold streams for contact notification.
*/
virtual void mergeResults();
virtual void getDataStreamBase(void*& /*contactStreamBase*/, void*& /*patchStreamBase*/, void*& /*forceAndIndicesStreamBase*/){}
/**
\brief Allocates and returns a thread context object.
\return A thread context.
*/
PX_FORCE_INLINE ThreadContext* getThreadContext()
{
return mThreadContextPool.get();
}
/**
\brief Returns a thread context to the thread context pool.
\param[in] context The thread context to return to the thread context pool.
*/
void putThreadContext(ThreadContext* context)
{
mThreadContextPool.put(context);
}
PX_FORCE_INLINE PxU32 getKinematicCount() const { return mKinematicCount; }
PX_FORCE_INLINE PxU64 getContextId() const { return mContextID; }
protected:
/**
\brief Constructor for DynamicsContext
*/
DynamicsContext(PxcNpMemBlockPool* memBlockPool,
PxcScratchAllocator& scratchAllocator,
Cm::FlushPool& taskPool,
PxvSimStats& simStats,
PxTaskManager* taskManager,
Ps::VirtualAllocatorCallback* allocator,
PxsMaterialManager* materialManager,
IG::IslandSim* accurateIslandSim,
PxU64 contextID,
const bool enableStabilization,
const bool useEnhancedDeterminism,
const bool useAdaptiveForce,
const PxReal maxBiasCoefficient,
const bool frictionEveryIteration
);
/**
\brief Destructor for DynamicsContext
*/
virtual ~DynamicsContext();
// Solver helper-methods
/**
\brief Computes the unconstrained velocity for a given PxsRigidBody
\param[in] atom The PxsRigidBody
*/
void computeUnconstrainedVelocity(PxsRigidBody* atom) const;
/**
\brief fills in a PxSolverConstraintDesc from an indexed interaction
\param[in,out] desc The PxSolverConstraintDesc
\param[in] constraint The PxsIndexedInteraction
*/
void setDescFromIndices(PxSolverConstraintDesc& desc,
const PxsIndexedInteraction& constraint, const PxU32 solverBodyOffset);
void setDescFromIndices(PxSolverConstraintDesc& desc, IG::EdgeIndex edgeIndex,
const IG::SimpleIslandManager& islandManager, PxU32* bodyRemapTable, const PxU32 solverBodyOffset);
/**
\brief Compute the unconstrained velocity for set of bodies in parallel. This function may spawn additional tasks.
\param[in] dt The timestep
\param[in] bodyArray The array of body cores
\param[in] originalBodyArray The array of PxsRigidBody
\param[in] nodeIndexArray The array of island node index
\param[in] bodyCount The number of bodies
\param[out] solverBodyPool The pool of solver bodies. These are synced with the corresponding body in bodyArray.
\param[out] solverBodyDataPool The pool of solver body data. These are synced with the corresponding body in bodyArray
\param[out] motionVelocityArray The motion velocities for the bodies
\param[out] maxSolverPositionIterations The maximum number of position iterations requested by any body in the island
\param[out] maxSolverVelocityIterations The maximum number of velocity iterations requested by any body in the island
\param[out] integrateTask The continuation task for any tasks spawned by this function.
*/
void preIntegrationParallel(
const PxF32 dt,
PxsBodyCore*const* bodyArray, // INOUT: core body attributes
PxsRigidBody*const* originalBodyArray, // IN: original body atom names (LEGACY - DON'T deref the ptrs!!)
PxU32 const* nodeIndexArray, // IN: island node index
PxU32 bodyCount, // IN: body count
PxSolverBody* solverBodyPool, // IN: solver atom pool (space preallocated)
PxSolverBodyData* solverBodyDataPool,
Cm::SpatialVector* motionVelocityArray, // OUT: motion velocities
PxU32& maxSolverPositionIterations,
PxU32& maxSolverVelocityIterations,
PxBaseTask& integrateTask
);
/**
\brief Solves an island in parallel.
\param[in] params Solver parameter structure
*/
void solveParallel(SolverIslandParams& params, IG::IslandSim& islandSim, Cm::SpatialVectorF* Z, Cm::SpatialVectorF* deltaV);
void integrateCoreParallel(SolverIslandParams& params, IG::IslandSim& islandSim);
/**
\brief Resets the thread contexts
*/
void resetThreadContexts();
/**
\brief Returns the scratch memory allocator.
\return The scratch memory allocator.
*/
PX_FORCE_INLINE PxcScratchAllocator& getScratchAllocator() { return mScratchAllocator; }
//Data
/**
\brief Body to represent the world static body.
*/
PX_ALIGN(16, PxSolverBody mWorldSolverBody);
/**
\brief Body data to represent the world static body.
*/
PX_ALIGN(16, PxSolverBodyData mWorldSolverBodyData);
/**
\brief A thread context pool
*/
PxcThreadCoherentCache<ThreadContext, PxcNpMemBlockPool> mThreadContextPool;
/**
\brief Solver constraint desc array
*/
SolverConstraintDescPool mSolverConstraintDescPool;
/**
\brief Ordered sover constraint desc array (after partitioning)
*/
SolverConstraintDescPool mOrderedSolverConstraintDescPool;
/**
\brief A temporary array of constraint descs used for partitioning
*/
SolverConstraintDescPool mTempSolverConstraintDescPool;
/**
\brief An array of contact constraint batch headers
*/
Ps::Array<PxConstraintBatchHeader> mContactConstraintBatchHeaders;
/**
\brief Array of motion velocities for all bodies in the scene.
*/
Ps::Array<Cm::SpatialVector> mMotionVelocityArray;
/**
\brief Array of body core pointers for all bodies in the scene.
*/
Ps::Array<PxsBodyCore*> mBodyCoreArray;
/**
\brief Array of rigid body pointers for all bodies in the scene.
*/
Ps::Array<PxsRigidBody*> mRigidBodyArray;
/**
\brief Array of articulationpointers for all articulations in the scene.
*/
Ps::Array<ArticulationV*> mArticulationArray;
/**
\brief Global pool for solver bodies. Kinematic bodies are at the start, and then dynamic bodies
*/
SolverBodyPool mSolverBodyPool;
/**
\brief Global pool for solver body data. Kinematic bodies are at the start, and then dynamic bodies
*/
SolverBodyDataPool mSolverBodyDataPool;
ThresholdStream* mExceededForceThresholdStream[2]; //this store previous and current exceeded force thresholdStream
Ps::Array<PxU32> mExceededForceThresholdStreamMask;
/**
\brief Interface to the solver core.
\note We currently only support PxsSolverCoreSIMD. Other cores may be added in future releases.
*/
SolverCore* mSolverCore[PxFrictionType::eFRICTION_COUNT];
Ps::Array<PxU32> mSolverBodyRemapTable; //Remaps from the "active island" index to the index within a solver island
Ps::Array<PxU32> mNodeIndexArray; //island node index
Ps::Array<PxsIndexedContactManager> mContactList;
/**
\brief The total number of kinematic bodies in the scene
*/
PxU32 mKinematicCount;
/**
\brief Atomic counter for the number of threshold stream elements.
*/
PxI32 mThresholdStreamOut;
PxsMaterialManager* mMaterialManager;
PxsContactManagerOutputIterator mOutputIterator;
private:
//private:
PxcScratchAllocator& mScratchAllocator;
Cm::FlushPool& mTaskPool;
PxTaskManager* mTaskManager;
PxU32 mCurrentIndex; // this is the index point to the current exceeded force threshold stream
PxU64 mContextID;
protected:
friend class PxsSolverStartTask;
friend class PxsSolverAticulationsTask;
friend class PxsSolverSetupConstraintsTask;
friend class PxsSolverCreateFinalizeConstraintsTask;
friend class PxsSolverConstraintPartitionTask;
friend class PxsSolverSetupSolveTask;
friend class PxsSolverIntegrateTask;
friend class PxsSolverEndTask;
friend class PxsSolverConstraintPostProcessTask;
friend class PxsForceThresholdTask;
friend class SolverArticulationUpdateTask;
friend void solveParallel(SOLVER_PARALLEL_METHOD_ARGS);
};
#if PX_VC
#pragma warning(pop)
#endif
}
}
#endif //DY_DYNAMICS_H

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,69 @@
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2021 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#ifndef PXD_FEATHERSTONE_ARTICULATION_LINK_H
#define PXD_FEATHERSTONE_ARTICULATION_LINK_H
#include "foundation/PxVec3.h"
#include "foundation/PxQuat.h"
#include "foundation/PxTransform.h"
#include "PsVecMath.h"
#include "CmUtils.h"
#include "CmSpatialVector.h"
#include "DyVArticulation.h"
#include "DyFeatherstoneArticulationUtils.h"
namespace physx
{
namespace Dy
{
class ArticulationLinkData
{
const static PxU32 MaxJointRows = 3;
public:
ArticulationLinkData()
{
maxPenBias = 0.f;
}
Cm::SpatialVectorF IsW[MaxJointRows];//stI is the transpose of Is
PxVec3 childToBase;
PxVec3 r; //vector from parent com to child com
PxVec3 rw; //vector from parent com to child com
PxReal qstZIc[MaxJointRows];//jointForce - stZIc
PxReal maxPenBias;
};
}//namespace Dy
}
#endif

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,298 @@
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2021 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#include "PxvConfig.h"
#include "DyCorrelationBuffer.h"
#include "PxsMaterialManager.h"
#include "PsUtilities.h"
#include "foundation/PxBounds3.h"
using namespace physx;
using namespace Gu;
namespace physx
{
namespace Dy
{
namespace
{
PX_FORCE_INLINE void initContactPatch(CorrelationBuffer::ContactPatchData& patch, PxU16 index, PxReal restitution, PxReal staticFriction, PxReal dynamicFriction,
PxU8 flags)
{
patch.start = index;
patch.count = 1;
patch.next = 0;
patch.flags = flags;
patch.restitution = restitution;
patch.staticFriction = staticFriction;
patch.dynamicFriction = dynamicFriction;
}
PX_FORCE_INLINE void initFrictionPatch(FrictionPatch& p, const PxVec3& worldNormal, const PxTransform& body0Pose, const PxTransform& body1Pose,
PxReal restitution, PxReal staticFriction, PxReal dynamicFriction, PxU8 materialFlags)
{
p.body0Normal = body0Pose.rotateInv(worldNormal);
p.body1Normal = body1Pose.rotateInv(worldNormal);
p.relativeQuat = body0Pose.q.getConjugate() * body1Pose.q;
p.anchorCount = 0;
p.broken = 0;
p.staticFriction = staticFriction;
p.dynamicFriction = dynamicFriction;
p.restitution = restitution;
p.materialFlags = materialFlags;
}
}
bool createContactPatches(CorrelationBuffer& fb, const Gu::ContactPoint* cb, PxU32 contactCount, PxReal normalTolerance)
{
// PT: this rewritten version below doesn't have LHS
PxU32 contactPatchCount = fb.contactPatchCount;
if(contactPatchCount == Gu::ContactBuffer::MAX_CONTACTS)
return false;
if(contactCount>0)
{
CorrelationBuffer::ContactPatchData* currentPatchData = fb.contactPatches + contactPatchCount;
const Gu::ContactPoint* PX_RESTRICT contacts = cb;
PxU8 count=1;
initContactPatch(fb.contactPatches[contactPatchCount++], Ps::to16(0), contacts[0].restitution,
contacts[0].staticFriction, contacts[0].dynamicFriction, PxU8(contacts[0].materialFlags));
PxBounds3 bounds(contacts[0].point, contacts[0].point);
PxU32 patchIndex = 0;
for (PxU32 i = 1; i<contactCount; i++)
{
const Gu::ContactPoint& curContact = contacts[i];
const Gu::ContactPoint& preContact = contacts[patchIndex];
if(curContact.staticFriction == preContact.staticFriction
&& curContact.dynamicFriction == preContact.dynamicFriction
&& curContact.restitution == preContact.restitution
&& curContact.normal.dot(preContact.normal)>=normalTolerance)
{
bounds.include(curContact.point);
count++;
}
else
{
if(contactPatchCount == Gu::ContactBuffer::MAX_CONTACTS)
return false;
patchIndex = i;
currentPatchData->count = count;
count = 1;
currentPatchData->patchBounds = bounds;
currentPatchData = fb.contactPatches + contactPatchCount;
initContactPatch(fb.contactPatches[contactPatchCount++], Ps::to16(i), curContact.restitution,
curContact.staticFriction, curContact.dynamicFriction, PxU8(curContact.materialFlags));
bounds = PxBounds3(curContact.point, curContact.point);
}
}
if(count!=1)
currentPatchData->count = count;
currentPatchData->patchBounds = bounds;
}
fb.contactPatchCount = contactPatchCount;
return true;
}
bool correlatePatches(CorrelationBuffer& fb,
const Gu::ContactPoint* cb,
const PxTransform& bodyFrame0,
const PxTransform& bodyFrame1,
PxReal normalTolerance,
PxU32 startContactPatchIndex,
PxU32 startFrictionPatchIndex)
{
bool overflow = false;
PxU32 frictionPatchCount = fb.frictionPatchCount;
for(PxU32 i=startContactPatchIndex;i<fb.contactPatchCount;i++)
{
CorrelationBuffer::ContactPatchData &c = fb.contactPatches[i];
const PxVec3 patchNormal = cb[c.start].normal;
PxU32 j=startFrictionPatchIndex;
for(;j<frictionPatchCount && ((patchNormal.dot(fb.frictionPatchWorldNormal[j]) < normalTolerance)
|| fb.frictionPatches[j].restitution != c.restitution|| fb.frictionPatches[j].staticFriction != c.staticFriction ||
fb.frictionPatches[j].dynamicFriction != c.dynamicFriction);j++)
;
if(j==frictionPatchCount)
{
overflow |= j==CorrelationBuffer::MAX_FRICTION_PATCHES;
if(overflow)
continue;
initFrictionPatch(fb.frictionPatches[frictionPatchCount], patchNormal, bodyFrame0, bodyFrame1, c.restitution, c.staticFriction, c.dynamicFriction, c.flags);
fb.frictionPatchWorldNormal[j] = patchNormal;
fb.frictionPatchContactCounts[frictionPatchCount] = c.count;
fb.patchBounds[frictionPatchCount] = c.patchBounds;
fb.contactID[frictionPatchCount][0] = 0xffff;
fb.contactID[frictionPatchCount++][1] = 0xffff;
c.next = CorrelationBuffer::LIST_END;
}
else
{
fb.patchBounds[j].include(c.patchBounds);
fb.frictionPatchContactCounts[j] += c.count;
c.next = Ps::to16(fb.correlationListHeads[j]);
}
fb.correlationListHeads[j] = i;
}
fb.frictionPatchCount = frictionPatchCount;
return overflow;
}
// run over the friction patches, trying to find two anchors per patch. If we already have
// anchors that are close, we keep them, which gives us persistent spring behavior
void growPatches(CorrelationBuffer& fb,
const ContactPoint* cb,
const PxTransform& bodyFrame0,
const PxTransform& bodyFrame1,
PxReal correlationDistance,
PxU32 frictionPatchStartIndex,
PxReal frictionOffsetThreshold)
{
for(PxU32 i=frictionPatchStartIndex;i<fb.frictionPatchCount;i++)
{
FrictionPatch& fp = fb.frictionPatches[i];
if (fp.anchorCount == 2 || fb.correlationListHeads[i] == CorrelationBuffer::LIST_END)
{
const PxReal frictionPatchDiagonalSq = fb.patchBounds[i].getDimensions().magnitudeSquared();
const PxReal anchorSqDistance = (fp.body0Anchors[0] - fp.body0Anchors[1]).magnitudeSquared();
//If the squared distance between the anchors is more than a quarter of the patch diagonal, we can keep,
//otherwise the anchors are potentially clustered around a corner so force a rebuild of the patch
if (fb.frictionPatchContactCounts[i] == 0 || (anchorSqDistance * 4.f) >= frictionPatchDiagonalSq)
continue;
fp.anchorCount = 0;
}
PxVec3 worldAnchors[2];
PxU16 anchorCount = 0;
PxReal pointDistSq = 0.0f, dist0, dist1;
// if we have an anchor already, keep it
if(fp.anchorCount == 1)
{
worldAnchors[anchorCount++] = bodyFrame0.transform(fp.body0Anchors[0]);
}
for(PxU32 patch = fb.correlationListHeads[i];
patch!=CorrelationBuffer::LIST_END;
patch = fb.contactPatches[patch].next)
{
CorrelationBuffer::ContactPatchData& cp = fb.contactPatches[patch];
for(PxU16 j=0;j<cp.count;j++)
{
const PxVec3& worldPoint = cb[cp.start+j].point;
if(cb[cp.start+j].separation < frictionOffsetThreshold)
{
switch(anchorCount)
{
case 0:
fb.contactID[i][0] = PxU16(cp.start+j);
worldAnchors[0] = worldPoint;
anchorCount++;
break;
case 1:
pointDistSq = (worldPoint-worldAnchors[0]).magnitudeSquared();
if (pointDistSq > (correlationDistance * correlationDistance))
{
fb.contactID[i][1] = PxU16(cp.start+j);
worldAnchors[1] = worldPoint;
anchorCount++;
}
break;
default: //case 2
dist0 = (worldPoint-worldAnchors[0]).magnitudeSquared();
dist1 = (worldPoint-worldAnchors[1]).magnitudeSquared();
if (dist0 > dist1)
{
if(dist0 > pointDistSq)
{
fb.contactID[i][1] = PxU16(cp.start+j);
worldAnchors[1] = worldPoint;
pointDistSq = dist0;
}
}
else if (dist1 > pointDistSq)
{
fb.contactID[i][0] = PxU16(cp.start+j);
worldAnchors[0] = worldPoint;
pointDistSq = dist1;
}
}
}
}
}
//PX_ASSERT(anchorCount > 0);
// add the new anchor(s) to the patch
for(PxU32 j = fp.anchorCount; j < anchorCount; j++)
{
fp.body0Anchors[j] = bodyFrame0.transformInv(worldAnchors[j]);
fp.body1Anchors[j] = bodyFrame1.transformInv(worldAnchors[j]);
}
// the block contact solver always reads at least one anchor per patch for performance reasons even if there are no valid patches,
// so we need to initialize this in the unexpected case that we have no anchors
if(anchorCount==0)
fp.body0Anchors[0] = fp.body1Anchors[0] = PxVec3(0);
fp.anchorCount = anchorCount;
}
}
}
}

View File

@ -0,0 +1,83 @@
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2021 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#ifndef PXC_FRICTIONPATCH_H
#define PXC_FRICTIONPATCH_H
#include "foundation/PxSimpleTypes.h"
#include "foundation/PxVec3.h"
#include "PxvConfig.h"
namespace physx
{
namespace Dy
{
struct FrictionPatch
{
PxU8 broken; // PT: must be first byte of struct, see "frictionBrokenWritebackByte"
PxU8 materialFlags;
PxU16 anchorCount;
PxReal restitution;
PxReal staticFriction;
PxReal dynamicFriction;
PxVec3 body0Normal;
PxVec3 body1Normal;
PxVec3 body0Anchors[2];
PxVec3 body1Anchors[2];
PxQuat relativeQuat;
PX_FORCE_INLINE void operator = (const FrictionPatch& other)
{
broken = other.broken;
materialFlags = other.materialFlags;
anchorCount = other.anchorCount;
body0Normal = other.body0Normal;
body1Normal = other.body1Normal;
body0Anchors[0] = other.body0Anchors[0];
body0Anchors[1] = other.body0Anchors[1];
body1Anchors[0] = other.body1Anchors[0];
body1Anchors[1] = other.body1Anchors[1];
relativeQuat = other.relativeQuat;
restitution = other.restitution;
staticFriction = other.staticFriction;
dynamicFriction = other.dynamicFriction;
}
};
//PX_COMPILE_TIME_ASSERT(sizeof(FrictionPatch)==80);
}
}
#endif

View File

@ -0,0 +1,128 @@
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2021 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#ifndef PXC_FRICTIONPATCHPOOL_H
#define PXC_FRICTIONPATCHPOOL_H
#include "foundation/PxSimpleTypes.h"
#include "PxvConfig.h"
#include "PsMutex.h"
#include "PsArray.h"
// Each narrow phase thread has an input stream of friction patches from the
// previous frame and an output stream of friction patches which will be
// saved for next frame. The patches persist for exactly one frame at which
// point they get thrown away.
// There is a stream pair per thread. A contact callback reserves space
// for its friction patches and gets a cookie in return that can stash
// for next frame. Cookies are valid for one frame only.
//
// note that all friction patches reserved are guaranteed to be contiguous;
// this might turn out to be a bit inefficient if we often have a large
// number of friction patches
#include "PxcNpMemBlockPool.h"
namespace physx
{
class FrictionPatchStreamPair
{
public:
FrictionPatchStreamPair(PxcNpMemBlockPool& blockPool);
// reserve can fail and return null. Read should never fail
template<class FrictionPatch>
FrictionPatch* reserve(const PxU32 size);
template<class FrictionPatch>
const FrictionPatch* findInputPatches(const PxU8* ptr) const;
void reset();
PxcNpMemBlockPool& getBlockPool() { return mBlockPool;}
private:
PxcNpMemBlockPool& mBlockPool;
PxcNpMemBlock* mBlock;
PxU32 mUsed;
FrictionPatchStreamPair& operator=(const FrictionPatchStreamPair&);
};
PX_FORCE_INLINE FrictionPatchStreamPair::FrictionPatchStreamPair(PxcNpMemBlockPool& blockPool):
mBlockPool(blockPool), mBlock(NULL), mUsed(0)
{
}
PX_FORCE_INLINE void FrictionPatchStreamPair::reset()
{
mBlock = NULL;
mUsed = 0;
}
// reserve can fail and return null. Read should never fail
template <class FrictionPatch>
FrictionPatch* FrictionPatchStreamPair::reserve(const PxU32 size)
{
if(size>PxcNpMemBlock::SIZE)
{
return reinterpret_cast<FrictionPatch*>(-1);
}
PX_ASSERT(size <= PxcNpMemBlock::SIZE);
FrictionPatch* ptr = NULL;
if(mBlock == NULL || mUsed + size > PxcNpMemBlock::SIZE)
{
mBlock = mBlockPool.acquireFrictionBlock();
mUsed = 0;
}
if(mBlock)
{
ptr = reinterpret_cast<FrictionPatch*>(mBlock->data+mUsed);
mUsed += size;
}
return ptr;
}
template <class FrictionPatch>
const FrictionPatch* FrictionPatchStreamPair::findInputPatches(const PxU8* ptr) const
{
return reinterpret_cast<const FrictionPatch*>(ptr);
}
}
#endif

View File

@ -0,0 +1,94 @@
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2021 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#include "CmUtils.h"
#include "DySolverBody.h"
#include "PxsRigidBody.h"
#include "PxvDynamics.h"
using namespace physx;
// PT: TODO: SIMDify all this...
void Dy::copyToSolverBodyData(const PxVec3& linearVelocity, const PxVec3& angularVelocity, const PxReal invMass, const PxVec3& invInertia, const PxTransform& globalPose,
const PxReal maxDepenetrationVelocity, const PxReal maxContactImpulse, const PxU32 nodeIndex, const PxReal reportThreshold, PxSolverBodyData& data, PxU32 lockFlags)
{
data.nodeIndex = nodeIndex;
const PxVec3 safeSqrtInvInertia = computeSafeSqrtInertia(invInertia);
// PT: TODO: re-SIMDify this one
const PxMat33 rotation(globalPose.q);
Cm::transformInertiaTensor(safeSqrtInvInertia, rotation, data.sqrtInvInertia);
// Copy simple properties
data.linearVelocity = linearVelocity;
data.angularVelocity = angularVelocity;
if (lockFlags)
{
if (lockFlags & PxRigidDynamicLockFlag::eLOCK_LINEAR_X)
data.linearVelocity.x = 0.f;
if (lockFlags & PxRigidDynamicLockFlag::eLOCK_LINEAR_Y)
data.linearVelocity.y = 0.f;
if (lockFlags & PxRigidDynamicLockFlag::eLOCK_LINEAR_Z)
data.linearVelocity.z = 0.f;
//KS - technically, we can zero the inertia columns and produce stiffer constraints. However, this can cause numerical issues with the
//joint solver, which is fixed by disabling joint preprocessing and setting minResponseThreshold to some reasonable value > 0. However, until
//this is handled automatically, it's probably better not to zero these inertia rows
if (lockFlags & PxRigidDynamicLockFlag::eLOCK_ANGULAR_X)
{
data.angularVelocity.x = 0.f;
//data.sqrtInvInertia.column0 = PxVec3(0.f);
}
if (lockFlags & PxRigidDynamicLockFlag::eLOCK_ANGULAR_Y)
{
data.angularVelocity.y = 0.f;
//data.sqrtInvInertia.column1 = PxVec3(0.f);
}
if (lockFlags & PxRigidDynamicLockFlag::eLOCK_ANGULAR_Z)
{
data.angularVelocity.z = 0.f;
//data.sqrtInvInertia.column2 = PxVec3(0.f);
}
}
PX_ASSERT(linearVelocity.isFinite());
PX_ASSERT(angularVelocity.isFinite());
data.invMass = invMass;
data.penBiasClamp = maxDepenetrationVelocity;
data.maxContactImpulse = maxContactImpulse;
data.body2World = globalPose;
data.lockFlags = PxU16(lockFlags);
data.reportThreshold = reportThreshold;
}

View File

@ -0,0 +1,72 @@
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2021 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#ifndef DY_SOLVERBODY_H
#define DY_SOLVERBODY_H
#include "foundation/PxVec3.h"
#include "foundation/PxTransform.h"
#include "foundation/PxMat33.h"
#include "CmPhysXCommon.h"
#include "CmSpatialVector.h"
#include "solver/PxSolverDefs.h"
namespace physx
{
class PxsRigidBody;
struct PxsBodyCore;
namespace Dy
{
// PT: TODO: make sure this is still needed / replace with V4sqrt
//This method returns values of 0 when the inertia is 0. This is a bit of a hack but allows us to
//represent kinematic objects' velocities in our new format
PX_FORCE_INLINE PxVec3 computeSafeSqrtInertia(const PxVec3& v)
{
return PxVec3( v.x == 0.0f ? 0.0f : PxSqrt(v.x),
v.y == 0.0f ? 0.0f : PxSqrt(v.y),
v.z == 0.0f ? 0.0f : PxSqrt(v.z));
}
void copyToSolverBodyData(const PxVec3& linearVelocity, const PxVec3& angularVelocity, const PxReal invMass, const PxVec3& invInertia, const PxTransform& globalPose,
const PxReal maxDepenetrationVelocity, const PxReal maxContactImpulse, const PxU32 nodeIndex, const PxReal reportThreshold, PxSolverBodyData& solverBodyData, PxU32 lockFlags);
// PT: TODO: using PxsBodyCore in the interface makes us write less data to the stack for passing arguments, and we can take advantage of the class layout
// (we know what is aligned or not, we know if it is safe to V4Load vectors, etc). Note that this is what we previously had, which is why PxsBodyCore was still
// forward-referenced above.
//void copyToSolverBodyData(PxSolverBodyData& solverBodyData, const PxsBodyCore& core, const PxU32 nodeIndex);
}
}
#endif //DY_SOLVERBODY_H

View File

@ -0,0 +1,204 @@
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2021 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#ifndef DY_SOLVER_CONSTRAINT_1D_H
#define DY_SOLVER_CONSTRAINT_1D_H
#include "foundation/PxVec3.h"
#include "PxvConfig.h"
#include "DyArticulationUtils.h"
#include "DySolverConstraintTypes.h"
#include "DySolverBody.h"
#include "PxConstraintDesc.h"
#include "DySolverConstraintDesc.h"
namespace physx
{
namespace Dy
{
// dsequeira: we should probably fork these structures for constraints and extended constraints,
// since there's a few things that are used for one but not the other
struct SolverConstraint1DHeader
{
PxU8 type; // enum SolverConstraintType - must be first byte
PxU8 count; // count of following 1D constraints
PxU8 dominance;
PxU8 breakable; // indicate whether this constraint is breakable or not
PxReal linBreakImpulse;
PxReal angBreakImpulse;
PxReal invMass0D0;
PxVec3 body0WorldOffset;
PxReal invMass1D1;
PxReal linearInvMassScale0; // only used by articulations
PxReal angularInvMassScale0; // only used by articulations
PxReal linearInvMassScale1; // only used by articulations
PxReal angularInvMassScale1; // only used by articulations
};
PX_COMPILE_TIME_ASSERT(sizeof(SolverConstraint1DHeader) == 48);
PX_ALIGN_PREFIX(16)
struct SolverConstraint1D
{
public:
PxVec3 lin0; //!< linear velocity projection (body 0)
PxReal constant; //!< constraint constant term
PxVec3 lin1; //!< linear velocity projection (body 1)
PxReal unbiasedConstant; //!< constraint constant term without bias
PxVec3 ang0; //!< angular velocity projection (body 0)
PxReal velMultiplier; //!< constraint velocity multiplier
PxVec3 ang1; //!< angular velocity projection (body 1)
PxReal impulseMultiplier; //!< constraint impulse multiplier
PxVec3 ang0Writeback; //!< unscaled angular velocity projection (body 0)
PxU32 pad;
PxReal minImpulse; //!< Lower bound on impulse magnitude
PxReal maxImpulse; //!< Upper bound on impulse magnitude
PxReal appliedForce; //!< applied force to correct velocity+bias
PxU32 flags;
} PX_ALIGN_SUFFIX(16);
PX_COMPILE_TIME_ASSERT(sizeof(SolverConstraint1D) == 96);
struct SolverConstraint1DExt : public SolverConstraint1D
{
public:
Cm::SpatialVectorV deltaVA;
Cm::SpatialVectorV deltaVB;
};
PX_COMPILE_TIME_ASSERT(sizeof(SolverConstraint1DExt) == 160);
PX_FORCE_INLINE void init(SolverConstraint1DHeader& h,
PxU8 count,
bool isExtended,
const PxConstraintInvMassScale& ims)
{
h.type = PxU8(isExtended ? DY_SC_TYPE_EXT_1D : DY_SC_TYPE_RB_1D);
h.count = count;
h.dominance = 0;
h.linearInvMassScale0 = ims.linear0;
h.angularInvMassScale0 = ims.angular0;
h.linearInvMassScale1 = -ims.linear1;
h.angularInvMassScale1 = -ims.angular1;
}
PX_FORCE_INLINE void init(SolverConstraint1D& c,
const PxVec3& _linear0, const PxVec3& _linear1,
const PxVec3& _angular0, const PxVec3& _angular1,
PxReal _minImpulse, PxReal _maxImpulse)
{
PX_ASSERT(_linear0.isFinite());
PX_ASSERT(_linear1.isFinite());
c.lin0 = _linear0;
c.lin1 = _linear1;
c.ang0 = _angular0;
c.ang1 = _angular1;
c.minImpulse = _minImpulse;
c.maxImpulse = _maxImpulse;
c.flags = 0;
c.appliedForce = 0;
}
PX_FORCE_INLINE bool needsNormalVel(const Px1DConstraint &c)
{
return c.flags & Px1DConstraintFlag::eRESTITUTION
|| (c.flags & Px1DConstraintFlag::eSPRING && c.flags & Px1DConstraintFlag::eACCELERATION_SPRING);
}
PX_FORCE_INLINE void setSolverConstants(PxReal& constant,
PxReal& unbiasedConstant,
PxReal& velMultiplier,
PxReal& impulseMultiplier,
const Px1DConstraint& c,
PxReal normalVel,
PxReal unitResponse,
PxReal minRowResponse,
PxReal erp,
PxReal dt,
PxReal recipdt)
{
PX_ASSERT(PxIsFinite(unitResponse));
PxReal recipResponse = unitResponse <= minRowResponse ? 0 : 1.0f/unitResponse;
PxReal geomError = c.geometricError * erp;
if(c.flags & Px1DConstraintFlag::eSPRING)
{
PxReal a = dt * dt * c.mods.spring.stiffness + dt * c.mods.spring.damping;
PxReal b = dt * (c.mods.spring.damping * c.velocityTarget - c.mods.spring.stiffness * geomError);
if(c.flags & Px1DConstraintFlag::eACCELERATION_SPRING)
{
PxReal x = 1.0f/(1.0f+a);
constant = unbiasedConstant = x * recipResponse * b;
velMultiplier = -x * recipResponse * a;
impulseMultiplier = 1.0f-x;
}
else
{
PxReal x = unitResponse == 0.f ? 0.f : 1.0f/(1.0f+a*unitResponse);
constant = unbiasedConstant = x * b;
velMultiplier = -x*a;
impulseMultiplier = 1.0f-x;
}
}
else
{
velMultiplier = -recipResponse;
impulseMultiplier = 1.0f;
if(c.flags & Px1DConstraintFlag::eRESTITUTION && -normalVel>c.mods.bounce.velocityThreshold)
{
unbiasedConstant = constant = recipResponse * c.mods.bounce.restitution*-normalVel;
}
else
{
// see usage of 'for internal use' in preprocessRows()
constant = recipResponse * (c.velocityTarget - geomError*recipdt);
unbiasedConstant = recipResponse * (c.velocityTarget - c.forInternalUse*recipdt);
}
}
}
}
}
#endif //DY_SOLVER_CONSTRAINT_1D_H

View File

@ -0,0 +1,106 @@
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2021 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#ifndef DY_SOLVERCONSTRAINT1D4_H
#define DY_SOLVERCONSTRAINT1D4_H
#include "foundation/PxVec3.h"
#include "PxvConfig.h"
#include "DyArticulationUtils.h"
#include "DySolverConstraint1D.h"
namespace physx
{
namespace Dy
{
struct SolverConstraint1DHeader4
{
PxU8 type; // enum SolverConstraintType - must be first byte
PxU8 pad0[3];
//These counts are the max of the 4 sets of data.
//When certain pairs have fewer constraints than others, they are padded with 0s so that no work is performed but
//calculations are still shared (afterall, they're computationally free because we're doing 4 things at a time in SIMD)
PxU32 count;
PxU8 count0, count1, count2, count3;
PxU8 break0, break1, break2, break3;
Vec4V linBreakImpulse;
Vec4V angBreakImpulse;
Vec4V invMass0D0;
Vec4V invMass1D1;
Vec4V angD0;
Vec4V angD1;
Vec4V body0WorkOffsetX;
Vec4V body0WorkOffsetY;
Vec4V body0WorkOffsetZ;
};
struct SolverConstraint1DBase4
{
public:
Vec4V lin0X;
Vec4V lin0Y;
Vec4V lin0Z;
Vec4V ang0X;
Vec4V ang0Y;
Vec4V ang0Z;
Vec4V ang0WritebackX;
Vec4V ang0WritebackY;
Vec4V ang0WritebackZ;
Vec4V constant;
Vec4V unbiasedConstant;
Vec4V velMultiplier;
Vec4V impulseMultiplier;
Vec4V minImpulse;
Vec4V maxImpulse;
Vec4V appliedForce;
PxU32 flags[4];
};
PX_COMPILE_TIME_ASSERT(sizeof(SolverConstraint1DBase4) == 272);
struct SolverConstraint1DDynamic4 : public SolverConstraint1DBase4
{
Vec4V lin1X;
Vec4V lin1Y;
Vec4V lin1Z;
Vec4V ang1X;
Vec4V ang1Y;
Vec4V ang1Z;
};
PX_COMPILE_TIME_ASSERT(sizeof(SolverConstraint1DDynamic4) == 368);
}
}
#endif //DY_SOLVERCONSTRAINT1D4_H

View File

@ -0,0 +1,224 @@
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2021 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#ifndef DY_SOLVER_CONSTRAINT_1D_STEP_H
#define DY_SOLVER_CONSTRAINT_1D_STEP_H
#include "foundation/PxVec3.h"
#include "PxvConfig.h"
#include "DyArticulationUtils.h"
#include "DySolverConstraintTypes.h"
#include "DySolverBody.h"
#include "PxConstraintDesc.h"
#include "DySolverConstraintDesc.h"
namespace physx
{
namespace Dy
{
struct SolverContactHeaderStep
{
enum DySolverContactFlags
{
eHAS_FORCE_THRESHOLDS = 0x1
};
PxU8 type; //Note: mType should be first as the solver expects a type in the first byte.
PxU8 flags;
PxU8 numNormalConstr;
PxU8 numFrictionConstr; //4
PxReal angDom0; //8
PxReal angDom1; //12
PxReal invMass0; //16
Vec4V staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W; //32
PxVec3 normal; //48
PxReal maxPenBias; //52
PxReal invMass1; //56
PxReal minNormalForce; //60
PxU32 broken; //64
PxU8* frictionBrokenWritebackByte; //68 72
Sc::ShapeInteraction* shapeInteraction; //72 80
#if !PX_P64_FAMILY
PxU32 pad[2]; //80
#endif
PX_FORCE_INLINE FloatV getStaticFriction() const { return V4GetX(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W); }
PX_FORCE_INLINE FloatV getDynamicFriction() const { return V4GetY(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W); }
PX_FORCE_INLINE FloatV getDominance0() const { return V4GetZ(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W); }
PX_FORCE_INLINE FloatV getDominance1() const { return V4GetW(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W); }
};
struct SolverContactPointStep
{
PxVec3 raXnI;
PxF32 separation;
PxVec3 rbXnI;
PxF32 velMultiplier;
PxF32 targetVelocity;
PxF32 biasCoefficient;
//2 more slots here for extra data
PxU32 pad[2];
};
struct SolverContactPointStepExt : public SolverContactPointStep
{
Vec3V linDeltaVA;
Vec3V linDeltaVB;
Vec3V angDeltaVA;
Vec3V angDeltaVB;
};
struct SolverContactFrictionStep
{
Vec4V normalXYZ_ErrorW; //16
Vec4V raXnI_targetVelW;
Vec4V rbXnI_velMultiplierW;
PxReal biasScale;
PxReal appliedForce;
PxReal frictionScale;
PxU32 pad[1];
PX_FORCE_INLINE void setAppliedForce(const FloatV f) { FStore(f, &appliedForce); }
};
struct SolverContactFrictionStepExt : public SolverContactFrictionStep
{
Vec3V linDeltaVA;
Vec3V linDeltaVB;
Vec3V angDeltaVA;
Vec3V angDeltaVB;
};
struct SolverConstraint1DHeaderStep
{
PxU8 type; // enum SolverConstraintType - must be first byte
PxU8 count; // count of following 1D constraints
PxU8 dominance;
PxU8 breakable; // indicate whether this constraint is breakable or not
PxReal linBreakImpulse;
PxReal angBreakImpulse;
PxReal invMass0D0;
PxVec3 body0WorldOffset;
PxReal invMass1D1;
PxVec3 rAWorld;
PxReal linearInvMassScale0; // only used by articulations
PxVec3 rBWorld;
PxReal angularInvMassScale0;
PxReal linearInvMassScale1; // only used by articulations
PxReal angularInvMassScale1;
PxU32 pad[2];
//Ortho axes for body 0, recipResponse in W component
PxVec4 angOrthoAxis0_recipResponseW[3];
//Ortho axes for body 1, error of body in W component
PxVec4 angOrthoAxis1_Error[3];
};
PX_FORCE_INLINE void init(SolverConstraint1DHeaderStep& h,
PxU8 count,
bool isExtended,
const PxConstraintInvMassScale& ims)
{
h.type = PxU8(isExtended ? DY_SC_TYPE_EXT_1D : DY_SC_TYPE_RB_1D);
h.count = count;
h.dominance = 0;
h.linearInvMassScale0 = ims.linear0;
h.angularInvMassScale0 = ims.angular0;
h.linearInvMassScale1 = -ims.linear1;
h.angularInvMassScale1 = -ims.angular1;
}
PX_ALIGN_PREFIX(16)
struct SolverConstraint1DStep
{
public:
PxVec3 lin0; //!< linear velocity projection (body 0)
PxReal error; //!< constraint error term - must be scaled by biasScale. Can be adjusted at run-time
PxVec3 lin1; //!< linear velocity projection (body 1)
PxReal biasScale; //!< constraint constant bias scale. Constant
PxVec3 ang0; //!< angular velocity projection (body 0)
PxReal velMultiplier; //!< constraint velocity multiplier
PxVec3 ang1; //!< angular velocity projection (body 1)
PxReal impulseMultiplier; //!< constraint impulse multiplier
PxReal velTarget; //!< Scaled target velocity of the constraint drive
PxReal minImpulse; //!< Lower bound on impulse magnitude
PxReal maxImpulse; //!< Upper bound on impulse magnitude
PxReal appliedForce; //!< applied force to correct velocity+bias
PxReal maxBias;
PxU32 flags;
PxReal recipResponse; //Constant. Only used for articulations;
PxReal angularErrorScale; //Constant
} PX_ALIGN_SUFFIX(16);
struct SolverConstraint1DExtStep : public SolverConstraint1DStep
{
public:
Cm::SpatialVectorV deltaVA;
Cm::SpatialVectorV deltaVB;
};
PX_FORCE_INLINE void init(SolverConstraint1DStep& c,
const PxVec3& _linear0, const PxVec3& _linear1,
const PxVec3& _angular0, const PxVec3& _angular1,
PxReal _minImpulse, PxReal _maxImpulse)
{
PX_ASSERT(_linear0.isFinite());
PX_ASSERT(_linear1.isFinite());
c.lin0 = _linear0;
c.lin1 = _linear1;
c.ang0 = _angular0;
c.ang1 = _angular1;
c.minImpulse = _minImpulse;
c.maxImpulse = _maxImpulse;
c.flags = 0;
c.appliedForce = 0;
c.angularErrorScale = 1.f;
}
}//namespace Dy
}
#endif

View File

@ -0,0 +1,142 @@
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2021 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#ifndef DY_SOLVERCONSTRAINTDESC_H
#define DY_SOLVERCONSTRAINTDESC_H
#include "PxvConfig.h"
#include "DySolverConstraintTypes.h"
#include "PsUtilities.h"
#include "PxConstraintDesc.h"
#include "solver/PxSolverDefs.h"
namespace physx
{
struct PxcNpWorkUnit;
struct PxsContactManagerOutput;
namespace Cm
{
class SpatialVector;
}
struct PxSolverBody;
struct PxSolverBodyData;
namespace Dy
{
struct FsData;
// dsequeira: moved this articulation stuff here to sever a build dep on Articulation.h through DyThreadContext.h and onward
struct SelfConstraintBlock
{
PxU32 startId;
PxU32 numSelfConstraints;
PxU16 fsDataLength;
PxU16 requiredSolverProgress;
uintptr_t eaFsData;
};
//This class rolls together multiple contact managers into a single contact manager.
struct CompoundContactManager
{
PxU32 mStartIndex;
PxU16 mStride;
PxU16 mReducedContactCount;
PxcNpWorkUnit* unit; //This is a work unit but the contact buffer has been adjusted to contain all the contacts for all the subsequent pairs
PxsContactManagerOutput* cmOutput;
PxU8* originalContactPatches; //This is the original contact buffer that we replaced with a combined buffer
PxU8* originalContactPoints;
PxU8 originalContactCount;
PxU8 originalPatchCount;
PxU8 originalStatusFlags;
PxReal* originalForceBuffer; //This is the original force buffer that we replaced with a combined force buffer
PxU16* forceBufferList; //This is a list of indices from the reduced force buffer to the original force buffers - we need this to fix up the write-backs from the solver
};
struct SolverConstraintPrepState
{
enum Enum
{
eOUT_OF_MEMORY,
eUNBATCHABLE,
eSUCCESS
};
};
PX_FORCE_INLINE bool isArticulationConstraint(const PxSolverConstraintDesc& desc)
{
return desc.linkIndexA != PxSolverConstraintDesc::NO_LINK ||
desc.linkIndexB != PxSolverConstraintDesc::NO_LINK;
}
PX_FORCE_INLINE void setConstraintLength(PxSolverConstraintDesc& desc, const PxU32 constraintLength)
{
PX_ASSERT(0==(constraintLength & 0x0f));
PX_ASSERT(constraintLength <= PX_MAX_U16 * 16);
desc.constraintLengthOver16 = Ps::to16(constraintLength >> 4);
}
PX_FORCE_INLINE void setWritebackLength(PxSolverConstraintDesc& desc, const PxU32 writeBackLength)
{
PX_ASSERT(0==(writeBackLength & 0x03));
PX_ASSERT(writeBackLength <= PX_MAX_U16 * 4);
desc.writeBackLengthOver4 = Ps::to16(writeBackLength >> 2);
}
PX_FORCE_INLINE PxU32 getConstraintLength(const PxSolverConstraintDesc& desc)
{
return PxU32(desc.constraintLengthOver16 << 4);
}
PX_FORCE_INLINE PxU32 getWritebackLength(const PxSolverConstraintDesc& desc)
{
return PxU32(desc.writeBackLengthOver4 << 2);
}
PX_COMPILE_TIME_ASSERT(0 == (0x0f & sizeof(PxSolverConstraintDesc)));
#define MAX_PERMITTED_SOLVER_PROGRESS 0xFFFF
}
}
#endif //DY_SOLVERCONSTRAINTDESC_H

View File

@ -0,0 +1,57 @@
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2021 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#ifndef DY_SOLVER_CONSTRAINT_EXT_SHARED_H
#define DY_SOLVER_CONSTRAINT_EXT_SHARED_H
#include "foundation/PxPreprocessor.h"
#include "PsVecMath.h"
#include "DyArticulationContactPrep.h"
#include "DySolverConstraintDesc.h"
#include "DySolverConstraint1D.h"
#include "DySolverContact.h"
#include "DySolverContactPF.h"
#include "DyArticulationHelper.h"
#include "PxcNpWorkUnit.h"
#include "PxsMaterialManager.h"
#include "PxsMaterialCombiner.h"
namespace physx
{
namespace Dy
{
FloatV setupExtSolverContact(const SolverExtBody& b0, const SolverExtBody& b1,
const FloatV& d0, const FloatV& d1, const FloatV& angD0, const FloatV& angD1, const Vec3V& bodyFrame0p, const Vec3V& bodyFrame1p,
const Vec3VArg normal, const FloatVArg invDt, const FloatVArg invDtp8, const FloatVArg restDistance, const FloatVArg maxPenBias, const FloatVArg restitution,
const FloatVArg bounceThreshold, const Gu::ContactPoint& contact, SolverContactPointExt& solverContact, const FloatVArg ccdMaxSeparation,
Cm::SpatialVectorF* Z, const Cm::SpatialVectorV& v0, const Cm::SpatialVectorV& v1);
} //namespace Dy
}
#endif //DY_SOLVER_CONSTRAINT_EXT_SHARED_H

View File

@ -0,0 +1,72 @@
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2021 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#ifndef DY_SOLVERCONSTRAINTTYPES_H
#define DY_SOLVERCONSTRAINTTYPES_H
#include "foundation/PxSimpleTypes.h"
#include "PxvConfig.h"
namespace physx
{
enum SolverConstraintType
{
DY_SC_TYPE_NONE = 0,
DY_SC_TYPE_RB_CONTACT, // RB-only contact
DY_SC_TYPE_RB_1D, // RB-only 1D constraint
DY_SC_TYPE_EXT_CONTACT, // contact involving articulations
DY_SC_TYPE_EXT_1D, // 1D constraint involving articulations
DY_SC_TYPE_STATIC_CONTACT, // RB-only contact where body b is static
DY_SC_TYPE_NOFRICTION_RB_CONTACT, //RB-only contact with no friction patch
DY_SC_TYPE_BLOCK_RB_CONTACT,
DY_SC_TYPE_BLOCK_STATIC_RB_CONTACT,
DY_SC_TYPE_BLOCK_1D,
DY_SC_TYPE_FRICTION,
DY_SC_TYPE_STATIC_FRICTION,
DY_SC_TYPE_EXT_FRICTION,
DY_SC_TYPE_BLOCK_FRICTION,
DY_SC_TYPE_BLOCK_STATIC_FRICTION,
DY_SC_CONSTRAINT_TYPE_COUNT //Count of the number of different constraint types in the solver
};
enum SolverConstraintFlags
{
DY_SC_FLAG_OUTPUT_FORCE = (1<<1),
DY_SC_FLAG_KEEP_BIAS = (1<<2),
DY_SC_FLAG_ROT_EQ = (1<<3),
DY_SC_FLAG_ORTHO_TARGET = (1<<4),
DY_SC_FLAG_SPRING = (1<<5),
DY_SC_FLAG_INEQUALITY = (1<<6)
};
}
#endif //DY_SOLVERCONSTRAINTTYPES_H

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,171 @@
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2021 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#ifndef DY_SOLVER_CORE_SHARED_H
#define DY_SOLVER_CORE_SHARED_H
#include "foundation/PxPreprocessor.h"
#include "PsVecMath.h"
#include "CmPhysXCommon.h"
#include "DySolverBody.h"
#include "DySolverContact.h"
#include "DySolverConstraint1D.h"
#include "DySolverConstraintDesc.h"
#include "PsUtilities.h"
#include "DyConstraint.h"
#include "PsAtomic.h"
namespace physx
{
namespace Dy
{
PX_FORCE_INLINE static FloatV solveDynamicContacts(SolverContactPoint* contacts, const PxU32 nbContactPoints, const Vec3VArg contactNormal,
const FloatVArg invMassA, const FloatVArg invMassB, const FloatVArg angDom0, const FloatVArg angDom1, Vec3V& linVel0_, Vec3V& angState0_,
Vec3V& linVel1_, Vec3V& angState1_, PxF32* PX_RESTRICT forceBuffer)
{
Vec3V linVel0 = linVel0_;
Vec3V angState0 = angState0_;
Vec3V linVel1 = linVel1_;
Vec3V angState1 = angState1_;
FloatV accumulatedNormalImpulse = FZero();
const Vec3V delLinVel0 = V3Scale(contactNormal, invMassA);
const Vec3V delLinVel1 = V3Scale(contactNormal, invMassB);
for(PxU32 i=0;i<nbContactPoints;i++)
{
SolverContactPoint& c = contacts[i];
Ps::prefetchLine(&contacts[i], 128);
const Vec3V raXn = c.raXn;
const Vec3V rbXn = c.rbXn;
const FloatV appliedForce = FLoad(forceBuffer[i]);
const FloatV velMultiplier = c.getVelMultiplier();
/*const FloatV targetVel = c.getTargetVelocity();
const FloatV nScaledBias = c.getScaledBias();*/
const FloatV maxImpulse = c.getMaxImpulse();
//Compute the normal velocity of the constraint.
const Vec3V v0 = V3MulAdd(linVel0, contactNormal, V3Mul(angState0, raXn));
const Vec3V v1 = V3MulAdd(linVel1, contactNormal, V3Mul(angState1, rbXn));
const FloatV normalVel = V3SumElems(V3Sub(v0, v1));
const FloatV biasedErr = c.getBiasedErr();//FScaleAdd(targetVel, velMultiplier, nScaledBias);
//KS - clamp the maximum force
const FloatV _deltaF = FMax(FNegScaleSub(normalVel, velMultiplier, biasedErr), FNeg(appliedForce));
const FloatV _newForce = FAdd(appliedForce, _deltaF);
const FloatV newForce = FMin(_newForce, maxImpulse);
const FloatV deltaF = FSub(newForce, appliedForce);
linVel0 = V3ScaleAdd(delLinVel0, deltaF, linVel0);
linVel1 = V3NegScaleSub(delLinVel1, deltaF, linVel1);
angState0 = V3ScaleAdd(raXn, FMul(deltaF, angDom0), angState0);
angState1 = V3NegScaleSub(rbXn, FMul(deltaF, angDom1), angState1);
FStore(newForce, &forceBuffer[i]);
accumulatedNormalImpulse = FAdd(accumulatedNormalImpulse, newForce);
}
linVel0_ = linVel0;
angState0_ = angState0;
linVel1_ = linVel1;
angState1_ = angState1;
return accumulatedNormalImpulse;
}
PX_FORCE_INLINE static FloatV solveStaticContacts(SolverContactPoint* contacts, const PxU32 nbContactPoints, const Vec3VArg contactNormal,
const FloatVArg invMassA, const FloatVArg angDom0, Vec3V& linVel0_, Vec3V& angState0_, PxF32* PX_RESTRICT forceBuffer)
{
Vec3V linVel0 = linVel0_;
Vec3V angState0 = angState0_;
FloatV accumulatedNormalImpulse = FZero();
const Vec3V delLinVel0 = V3Scale(contactNormal, invMassA);
for(PxU32 i=0;i<nbContactPoints;i++)
{
SolverContactPoint& c = contacts[i];
Ps::prefetchLine(&contacts[i],128);
const Vec3V raXn = c.raXn;
const FloatV appliedForce = FLoad(forceBuffer[i]);
const FloatV velMultiplier = c.getVelMultiplier();
/*const FloatV targetVel = c.getTargetVelocity();
const FloatV nScaledBias = c.getScaledBias();*/
const FloatV maxImpulse = c.getMaxImpulse();
const Vec3V v0 = V3MulAdd(linVel0, contactNormal, V3Mul(angState0, raXn));
const FloatV normalVel = V3SumElems(v0);
const FloatV biasedErr = c.getBiasedErr();//FScaleAdd(targetVel, velMultiplier, nScaledBias);
// still lots to do here: using loop pipelining we can interweave this code with the
// above - the code here has a lot of stalls that we would thereby eliminate
const FloatV _deltaF = FMax(FNegScaleSub(normalVel, velMultiplier, biasedErr), FNeg(appliedForce));
const FloatV _newForce = FAdd(appliedForce, _deltaF);
const FloatV newForce = FMin(_newForce, maxImpulse);
const FloatV deltaF = FSub(newForce, appliedForce);
linVel0 = V3ScaleAdd(delLinVel0, deltaF, linVel0);
angState0 = V3ScaleAdd(raXn, FMul(deltaF, angDom0), angState0);
FStore(newForce, &forceBuffer[i]);
accumulatedNormalImpulse = FAdd(accumulatedNormalImpulse, newForce);
}
linVel0_ = linVel0;
angState0_ = angState0;
return accumulatedNormalImpulse;
}
FloatV solveExtContacts(SolverContactPointExt* contacts, const PxU32 nbContactPoints, const Vec3VArg contactNormal,
Vec3V& linVel0, Vec3V& angVel0,
Vec3V& linVel1, Vec3V& angVel1,
Vec3V& li0, Vec3V& ai0,
Vec3V& li1, Vec3V& ai1,
PxF32* PX_RESTRICT appliedForceBuffer);
}
}
#endif //DY_SOLVER_CORE_SHARED_H

View File

@ -0,0 +1,228 @@
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2021 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#ifndef DY_SOLVERCONTACT_H
#define DY_SOLVERCONTACT_H
#include "foundation/PxSimpleTypes.h"
#include "foundation/PxVec3.h"
#include "PxvConfig.h"
#include "PsVecMath.h"
namespace physx
{
using namespace Ps::aos;
namespace Sc
{
class ShapeInteraction;
}
/**
\brief A header to represent a friction patch for the solver.
*/
namespace Dy
{
struct SolverContactHeader
{
enum DySolverContactFlags
{
eHAS_FORCE_THRESHOLDS = 0x1
};
PxU8 type; //Note: mType should be first as the solver expects a type in the first byte.
PxU8 flags;
PxU8 numNormalConstr;
PxU8 numFrictionConstr; //4
PxReal angDom0; //8
PxReal angDom1; //12
PxReal invMass0; //16
Vec4V staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W; //32
//KS - minAppliedImpulseForFrictionW is non-zero only for articulations. This is a workaround for a case in articulations where
//the impulse is propagated such that many links do not apply friction because their normal forces were corrected by the solver in a previous
//link. This results in some links sliding unnaturally. This occurs with prismatic or revolute joints where the impulse propagation one one link
//resolves the normal constraint on all links
Vec4V normal_minAppliedImpulseForFrictionW; //48
PxReal invMass1; //52
PxU32 broken; //56
PxU8* frictionBrokenWritebackByte; //60 64
Sc::ShapeInteraction* shapeInteraction; //64 72
#if PX_P64_FAMILY
PxU32 pad[2]; //64 80
#endif // PX_X64
PX_FORCE_INLINE void setStaticFriction(const FloatV f) {staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W=V4SetX(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W,f);}
PX_FORCE_INLINE void setDynamicFriction(const FloatV f) {staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W=V4SetY(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W,f);}
PX_FORCE_INLINE void setDominance0(const FloatV f) {staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W=V4SetZ(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W,f);}
PX_FORCE_INLINE void setDominance1(const FloatV f) {staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W=V4SetW(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W,f);}
PX_FORCE_INLINE FloatV getStaticFriction() const {return V4GetX(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W);}
PX_FORCE_INLINE FloatV getDynamicFriction() const {return V4GetY(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W);}
PX_FORCE_INLINE FloatV getDominance0() const {return V4GetZ(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W);}
PX_FORCE_INLINE FloatV getDominance1() const {return V4GetW(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W);}
PX_FORCE_INLINE void setStaticFriction(PxF32 f) {V4WriteX(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W, f);}
PX_FORCE_INLINE void setDynamicFriction(PxF32 f) {V4WriteY(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W, f);}
PX_FORCE_INLINE void setDominance0(PxF32 f) {V4WriteZ(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W, f);}
PX_FORCE_INLINE void setDominance1(PxF32 f) {V4WriteW(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W, f);}
PX_FORCE_INLINE PxF32 getStaticFrictionPxF32() const {return V4ReadX(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W);}
PX_FORCE_INLINE PxF32 getDynamicFrictionPxF32() const {return V4ReadY(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W);}
PX_FORCE_INLINE PxF32 getDominance0PxF32() const {return V4ReadZ(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W);}
PX_FORCE_INLINE PxF32 getDominance1PxF32() const {return V4ReadW(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W);}
};
#if !PX_P64_FAMILY
PX_COMPILE_TIME_ASSERT(sizeof(SolverContactHeader) == 64);
#else
PX_COMPILE_TIME_ASSERT(sizeof(SolverContactHeader) == 80);
#endif
/**
\brief A single rigid body contact point for the solver.
*/
struct SolverContactPoint
{
Vec3V raXn;
Vec3V rbXn;
PxF32 velMultiplier;
PxF32 biasedErr;
PxF32 unbiasedErr;
PxF32 maxImpulse;
PX_FORCE_INLINE FloatV getVelMultiplier() const {return FLoad(velMultiplier);}
PX_FORCE_INLINE FloatV getBiasedErr() const {return FLoad(biasedErr);}
PX_FORCE_INLINE FloatV getMaxImpulse() const {return FLoad(maxImpulse);}
PX_FORCE_INLINE Vec3V getRaXn() const {return raXn;}
PX_FORCE_INLINE Vec3V getRbXn() const {return rbXn;}
PX_FORCE_INLINE void setRaXn(const PxVec3& v) {V3WriteXYZ(raXn, v);}
PX_FORCE_INLINE void setRbXn(const PxVec3& v) {V3WriteXYZ(rbXn, v);}
PX_FORCE_INLINE void setVelMultiplier(PxF32 f) {velMultiplier = f;}
PX_FORCE_INLINE void setBiasedErr(PxF32 f) {biasedErr = f;}
PX_FORCE_INLINE void setUnbiasedErr(PxF32 f) {unbiasedErr = f;}
PX_FORCE_INLINE PxF32 getVelMultiplierPxF32() const {return velMultiplier;}
PX_FORCE_INLINE const PxVec3& getRaXnPxVec3() const {return V3ReadXYZ(raXn);}
PX_FORCE_INLINE const PxVec3& getRbXnPxVec3() const {return V3ReadXYZ(rbXn);}
PX_FORCE_INLINE PxF32 getBiasedErrPxF32() const {return biasedErr;}
};
PX_COMPILE_TIME_ASSERT(sizeof(SolverContactPoint) == 48);
/**
\brief A single extended articulation contact point for the solver.
*/
struct SolverContactPointExt : public SolverContactPoint
{
Vec3V linDeltaVA;
Vec3V angDeltaVA;
Vec3V linDeltaVB;
Vec3V angDeltaVB;
};
PX_COMPILE_TIME_ASSERT(sizeof(SolverContactPointExt) == 112);
/**
\brief A single friction constraint for the solver.
*/
struct SolverContactFriction
{
Vec4V normalXYZ_appliedForceW; //16
Vec4V raXnXYZ_velMultiplierW; //32
Vec4V rbXnXYZ_biasW; //48
PxReal targetVel; //52
PxU32 mPad[3]; //64
PX_FORCE_INLINE void setAppliedForce(const FloatV f) {normalXYZ_appliedForceW=V4SetW(normalXYZ_appliedForceW,f);}
PX_FORCE_INLINE void setVelMultiplier(const FloatV f) {raXnXYZ_velMultiplierW=V4SetW(raXnXYZ_velMultiplierW,f);}
PX_FORCE_INLINE void setBias(const FloatV f) {rbXnXYZ_biasW=V4SetW(rbXnXYZ_biasW,f);}
PX_FORCE_INLINE FloatV getAppliedForce() const {return V4GetW(normalXYZ_appliedForceW);}
PX_FORCE_INLINE FloatV getVelMultiplier() const {return V4GetW(raXnXYZ_velMultiplierW);}
PX_FORCE_INLINE FloatV getBias() const {return V4GetW(rbXnXYZ_biasW);}
PX_FORCE_INLINE Vec3V getNormal() const {return Vec3V_From_Vec4V(normalXYZ_appliedForceW);}
PX_FORCE_INLINE Vec3V getRaXn() const {return Vec3V_From_Vec4V(raXnXYZ_velMultiplierW);}
PX_FORCE_INLINE Vec3V getRbXn() const {return Vec3V_From_Vec4V(rbXnXYZ_biasW);}
PX_FORCE_INLINE void setNormal(const PxVec3& v) {V4WriteXYZ(normalXYZ_appliedForceW, v);}
PX_FORCE_INLINE void setRaXn(const PxVec3& v) {V4WriteXYZ(raXnXYZ_velMultiplierW, v);}
PX_FORCE_INLINE void setRbXn(const PxVec3& v) {V4WriteXYZ(rbXnXYZ_biasW, v);}
PX_FORCE_INLINE const PxVec3& getNormalPxVec3() const {return V4ReadXYZ(normalXYZ_appliedForceW);}
PX_FORCE_INLINE const PxVec3& getRaXnPxVec3() const {return V4ReadXYZ(raXnXYZ_velMultiplierW);}
PX_FORCE_INLINE const PxVec3& getRbXnPxVec3() const {return V4ReadXYZ(rbXnXYZ_biasW);}
PX_FORCE_INLINE void setAppliedForce(PxF32 f) {V4WriteW(normalXYZ_appliedForceW, f);}
PX_FORCE_INLINE void setVelMultiplier(PxF32 f) {V4WriteW(raXnXYZ_velMultiplierW, f);}
PX_FORCE_INLINE void setBias(PxF32 f) {V4WriteW(rbXnXYZ_biasW, f);}
PX_FORCE_INLINE PxF32 getAppliedForcePxF32() const {return V4ReadW(normalXYZ_appliedForceW);}
PX_FORCE_INLINE PxF32 getVelMultiplierPxF32() const {return V4ReadW(raXnXYZ_velMultiplierW);}
PX_FORCE_INLINE PxF32 getBiasPxF32() const {return V4ReadW(rbXnXYZ_biasW);}
};
PX_COMPILE_TIME_ASSERT(sizeof(SolverContactFriction) == 64);
/**
\brief A single extended articulation friction constraint for the solver.
*/
struct SolverContactFrictionExt : public SolverContactFriction
{
Vec3V linDeltaVA;
Vec3V angDeltaVA;
Vec3V linDeltaVB;
Vec3V angDeltaVB;
};
PX_COMPILE_TIME_ASSERT(sizeof(SolverContactFrictionExt) == 128);
}
}
#endif //DY_SOLVERCONTACT_H

View File

@ -0,0 +1,179 @@
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2021 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#ifndef DY_SOLVERCONTACT4_H
#define DY_SOLVERCONTACT4_H
#include "foundation/PxSimpleTypes.h"
#include "foundation/PxVec3.h"
#include "PxvConfig.h"
#include "PsVecMath.h"
#include "DySolverContact.h"
namespace physx
{
struct PxcNpWorkUnit;
struct PxSolverBody;
struct PxSolverBodyData;
struct PxSolverConstraintDesc;
namespace Sc
{
class ShapeInteraction;
}
namespace Dy
{
/**
\brief Batched SOA contact data. Note, we don't support batching with extended contacts for the simple reason that handling multiple articulations would be complex.
*/
struct SolverContactHeader4
{
enum
{
eHAS_MAX_IMPULSE = 1 << 0,
eHAS_TARGET_VELOCITY = 1 << 1
};
PxU8 type; //Note: mType should be first as the solver expects a type in the first byte.
PxU8 numNormalConstr;
PxU8 numFrictionConstr;
PxU8 flag;
PxU8 flags[4];
//These counts are the max of the 4 sets of data.
//When certain pairs have fewer patches/contacts than others, they are padded with 0s so that no work is performed but
//calculations are still shared (afterall, they're computationally free because we're doing 4 things at a time in SIMD)
//KS - used for write-back only
PxU8 numNormalConstr0, numNormalConstr1, numNormalConstr2, numNormalConstr3;
PxU8 numFrictionConstr0, numFrictionConstr1, numFrictionConstr2, numFrictionConstr3;
Vec4V restitution;
Vec4V staticFriction;
Vec4V dynamicFriction;
//Technically, these mass properties could be pulled out into a new structure and shared. For multi-manifold contacts,
//this would save 64 bytes per-manifold after the cost of the first manifold
Vec4V invMass0D0;
Vec4V invMass1D1;
Vec4V angDom0;
Vec4V angDom1;
//Normal is shared between all contacts in the batch. This will save some memory!
Vec4V normalX;
Vec4V normalY;
Vec4V normalZ;
Sc::ShapeInteraction* shapeInteraction[4]; //192 or 208
};
#if !PX_P64_FAMILY
PX_COMPILE_TIME_ASSERT(sizeof(SolverContactHeader4) == 192);
#else
PX_COMPILE_TIME_ASSERT(sizeof(SolverContactHeader4) == 208);
#endif
/**
\brief This represents a batch of 4 contacts with static rolled into a single structure
*/
struct SolverContactBatchPointBase4
{
Vec4V raXnX;
Vec4V raXnY;
Vec4V raXnZ;
Vec4V velMultiplier;
Vec4V scaledBias;
Vec4V biasedErr;
};
PX_COMPILE_TIME_ASSERT(sizeof(SolverContactBatchPointBase4) == 96);
/**
\brief Contains the additional data required to represent 4 contacts between 2 dynamic bodies
@see SolverContactBatchPointBase4
*/
struct SolverContactBatchPointDynamic4 : public SolverContactBatchPointBase4
{
Vec4V rbXnX;
Vec4V rbXnY;
Vec4V rbXnZ;
};
PX_COMPILE_TIME_ASSERT(sizeof(SolverContactBatchPointDynamic4) == 144);
/**
\brief This represents the shared information of a batch of 4 friction constraints
*/
struct SolverFrictionSharedData4
{
BoolV broken;
PxU8* frictionBrokenWritebackByte[4];
Vec4V normalX[2];
Vec4V normalY[2];
Vec4V normalZ[2];
};
#if !PX_P64_FAMILY
PX_COMPILE_TIME_ASSERT(sizeof(SolverFrictionSharedData4) == 128);
#endif
/**
\brief This represents a batch of 4 friction constraints with static rolled into a single structure
*/
struct SolverContactFrictionBase4
{
Vec4V raXnX;
Vec4V raXnY;
Vec4V raXnZ;
Vec4V scaledBias;
Vec4V velMultiplier;
Vec4V targetVelocity;
};
PX_COMPILE_TIME_ASSERT(sizeof(SolverContactFrictionBase4) == 96);
/**
\brief Contains the additional data required to represent 4 friction constraints between 2 dynamic bodies
@see SolverContactFrictionBase4
*/
struct SolverContactFrictionDynamic4 : public SolverContactFrictionBase4
{
Vec4V rbXnX;
Vec4V rbXnY;
Vec4V rbXnZ;
};
PX_COMPILE_TIME_ASSERT(sizeof(SolverContactFrictionDynamic4) == 144);
}
}
#endif //DY_SOLVERCONTACT4_H

View File

@ -0,0 +1,123 @@
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2021 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#ifndef DY_SOLVERCONTACTPF_H
#define DY_SOLVERCONTACTPF_H
#include "foundation/PxSimpleTypes.h"
#include "foundation/PxVec3.h"
#include "PxvConfig.h"
#include "PsVecMath.h"
namespace physx
{
using namespace Ps::aos;
namespace Dy
{
struct SolverContactCoulombHeader
{
PxU8 type; //Note: mType should be first as the solver expects a type in the first byte.
PxU8 numNormalConstr;
PxU16 frictionOffset; //4
//PxF32 restitution;
PxF32 angDom0; //8
PxF32 dominance0; //12
PxF32 dominance1; //16
PX_ALIGN(16, PxVec3 normalXYZ); //28
PxF32 angDom1; //32
Sc::ShapeInteraction* shapeInteraction; //36 40
PxU8 flags; //37 41
PxU8 pad0[3]; //40 44
#if !PX_P64_FAMILY
PxU32 pad1[2]; //48
#else
PxU32 pad1; // 48
#endif
PX_FORCE_INLINE void setDominance0(const FloatV f) {FStore(f, &dominance0);}
PX_FORCE_INLINE void setDominance1(const FloatV f) {FStore(f, &dominance1);}
PX_FORCE_INLINE void setNormal(const Vec3V n) {V3StoreA(n, normalXYZ);}
PX_FORCE_INLINE FloatV getDominance0() const {return FLoad(dominance0);}
PX_FORCE_INLINE FloatV getDominance1() const {return FLoad(dominance1);}
//PX_FORCE_INLINE FloatV getRestitution() const {return FLoad(restitution);}
PX_FORCE_INLINE Vec3V getNormal()const {return V3LoadA(normalXYZ);}
PX_FORCE_INLINE void setDominance0(PxF32 f) { dominance0 = f; }
PX_FORCE_INLINE void setDominance1(PxF32 f) { dominance1 = f;}
//PX_FORCE_INLINE void setRestitution(PxF32 f) { restitution = f;}
PX_FORCE_INLINE PxF32 getDominance0PxF32() const {return dominance0;}
PX_FORCE_INLINE PxF32 getDominance1PxF32() const {return dominance1;}
//PX_FORCE_INLINE PxF32 getRestitutionPxF32() const {return restitution;}
};
PX_COMPILE_TIME_ASSERT(sizeof(SolverContactCoulombHeader) == 48);
struct SolverFrictionHeader
{
PxU8 type; //Note: mType should be first as the solver expects a type in the first byte.
PxU8 numNormalConstr;
PxU8 numFrictionConstr;
PxU8 flags;
PxF32 staticFriction;
PxF32 invMass0D0;
PxF32 invMass1D1;
PxF32 angDom0;
PxF32 angDom1;
PxU32 pad2[2];
PX_FORCE_INLINE void setStaticFriction(const FloatV f) {FStore(f, &staticFriction);}
PX_FORCE_INLINE FloatV getStaticFriction() const {return FLoad(staticFriction);}
PX_FORCE_INLINE void setStaticFriction(PxF32 f) {staticFriction = f;}
PX_FORCE_INLINE PxF32 getStaticFrictionPxF32() const {return staticFriction;}
PX_FORCE_INLINE PxU32 getAppliedForcePaddingSize() const {return sizeof(PxU32)*((4 * ((numNormalConstr + 3)/4)));}
static PX_FORCE_INLINE PxU32 getAppliedForcePaddingSize(const PxU32 numConstr) {return sizeof(PxU32)*((4 * ((numConstr + 3)/4)));}
};
PX_COMPILE_TIME_ASSERT(sizeof(SolverFrictionHeader) == 32);
}
}
#endif //DY_SOLVERCONTACTPF_H

View File

@ -0,0 +1,155 @@
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2021 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#ifndef DY_SOLVER_CONTACT_PF_4_H
#define DY_SOLVER_CONTACT_PF_4_H
#include "foundation/PxSimpleTypes.h"
#include "foundation/PxVec3.h"
#include "PxvConfig.h"
#include "PsVecMath.h"
namespace physx
{
using namespace Ps::aos;
namespace Sc
{
class ShapeInteraction;
}
namespace Dy
{
struct SolverContactCoulombHeader4
{
PxU8 type; //Note: mType should be first as the solver expects a type in the first byte.
PxU8 numNormalConstr;
PxU16 frictionOffset;
PxU8 numNormalConstr0, numNormalConstr1, numNormalConstr2, numNormalConstr3;
PxU8 flags[4];
PxU32 pad; //16
Vec4V restitution; //32
Vec4V normalX; //48
Vec4V normalY; //64
Vec4V normalZ; //80
Vec4V invMassADom; //96
Vec4V invMassBDom; //112
Vec4V angD0; //128
Vec4V angD1; //144
Sc::ShapeInteraction* shapeInteraction[4]; //160 or 176
};
#if !PX_P64_FAMILY
PX_COMPILE_TIME_ASSERT(sizeof(SolverContactCoulombHeader4) == 160);
#else
PX_COMPILE_TIME_ASSERT(sizeof(SolverContactCoulombHeader4) == 176);
#endif
struct SolverContact4Base
{
Vec4V raXnX;
Vec4V raXnY;
Vec4V raXnZ;
Vec4V appliedForce;
Vec4V velMultiplier;
Vec4V targetVelocity;
Vec4V scaledBias;
Vec4V maxImpulse;
};
PX_COMPILE_TIME_ASSERT(sizeof(SolverContact4Base) == 128);
struct SolverContact4Dynamic : public SolverContact4Base
{
Vec4V rbXnX;
Vec4V rbXnY;
Vec4V rbXnZ;
};
PX_COMPILE_TIME_ASSERT(sizeof(SolverContact4Dynamic) == 176);
struct SolverFrictionHeader4
{
PxU8 type; //Note: mType should be first as the solver expects a type in the first byte.
PxU8 numNormalConstr;
PxU8 numFrictionConstr;
PxU8 numNormalConstr0;
PxU8 numNormalConstr1;
PxU8 numNormalConstr2;
PxU8 numNormalConstr3;
PxU8 numFrictionConstr0;
PxU8 numFrictionConstr1;
PxU8 numFrictionConstr2;
PxU8 numFrictionConstr3;
PxU8 pad0;
PxU32 frictionPerContact;
Vec4V staticFriction;
Vec4V invMassADom;
Vec4V invMassBDom;
Vec4V angD0;
Vec4V angD1;
};
PX_COMPILE_TIME_ASSERT(sizeof(SolverFrictionHeader4) == 96);
struct SolverFriction4Base
{
Vec4V normalX;
Vec4V normalY;
Vec4V normalZ;
Vec4V raXnX;
Vec4V raXnY;
Vec4V raXnZ;
Vec4V appliedForce;
Vec4V velMultiplier;
Vec4V targetVelocity;
};
PX_COMPILE_TIME_ASSERT(sizeof(SolverFriction4Base) == 144);
struct SolverFriction4Dynamic : public SolverFriction4Base
{
Vec4V rbXnX;
Vec4V rbXnY;
Vec4V rbXnZ;
};
PX_COMPILE_TIME_ASSERT(sizeof(SolverFriction4Dynamic) == 192);
}
}
#endif //DY_SOLVER_CONTACT_PF_4_H

View File

@ -0,0 +1,66 @@
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2021 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#ifndef DY_SOLVERCONTEXT_H
#define DY_SOLVERCONTEXT_H
namespace physx
{
struct PxSolverBodyData;
namespace Dy
{
struct ThresholdStreamElement;
struct SolverContext
{
bool doFriction;
bool writeBackIteration;
// for threshold stream output
ThresholdStreamElement* mThresholdStream;
PxU32 mThresholdStreamIndex;
PxU32 mThresholdStreamLength;
PxSolverBodyData* solverBodyArray;
ThresholdStreamElement* PX_RESTRICT mSharedThresholdStream;
PxU32 mSharedThresholdStreamLength;
PxI32* mSharedOutThresholdPairs;
Cm::SpatialVectorF* Z;
Cm::SpatialVectorF* deltaV;
};
}
}
#endif //DY_SOLVERCONTEXT_H

View File

@ -0,0 +1,753 @@
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2021 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#include "foundation/PxPreprocessor.h"
#include "PsAllocator.h"
#include <new>
#include <stdio.h>
#include "CmPhysXCommon.h"
#include "DySolverBody.h"
#include "DySolverConstraint1D.h"
#include "DySolverContact.h"
#include "DyThresholdTable.h"
#include "DySolverControl.h"
#include "DyArticulationHelper.h"
#include "PsAtomic.h"
#include "PsIntrinsics.h"
#include "DyArticulationPImpl.h"
#include "PsThread.h"
#include "DySolverConstraintDesc.h"
#include "DySolverContext.h"
namespace physx
{
namespace Dy
{
//-----------------------------------
void solve1DBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
void solveContactBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
void solveExtContactBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
void solveExt1DBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
void solveContact_BStaticBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
void solveContactPreBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
void solveContactPreBlock_Static (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
void solve1D4_Block (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
void solve1DConcludeBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
void solveContactConcludeBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
void solveExtContactConcludeBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
void solveExt1DConcludeBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
void solveContact_BStaticConcludeBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
void solveContactPreBlock_Conclude (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
void solveContactPreBlock_ConcludeStatic(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
void solve1D4Block_Conclude (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
void solve1DBlockWriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
void solveContactBlockWriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
void solveExtContactBlockWriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
void solveExt1DBlockWriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
void solveContact_BStaticBlockWriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
void solveContactPreBlock_WriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
void solveContactPreBlock_WriteBackStatic(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
void solve1D4Block_WriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
void writeBack1DBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
void contactBlockWriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
void extContactBlockWriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
void ext1DBlockWriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
void contactPreBlock_WriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
void writeBack1D4Block (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
// could move this to PxPreprocessor.h but
// no implementation available for MSVC
#if PX_GCC_FAMILY
#define PX_UNUSED_ATTRIBUTE __attribute__((unused))
#else
#define PX_UNUSED_ATTRIBUTE
#endif
#define DYNAMIC_ARTICULATION_REGISTRATION(x) 0
static SolveBlockMethod gVTableSolveBlock[] PX_UNUSED_ATTRIBUTE =
{
0,
solveContactBlock, // DY_SC_TYPE_RB_CONTACT
solve1DBlock, // DY_SC_TYPE_RB_1D
DYNAMIC_ARTICULATION_REGISTRATION(solveExtContactBlock), // DY_SC_TYPE_EXT_CONTACT
DYNAMIC_ARTICULATION_REGISTRATION(solveExt1DBlock), // DY_SC_TYPE_EXT_1D
solveContact_BStaticBlock, // DY_SC_TYPE_STATIC_CONTACT
solveContactBlock, // DY_SC_TYPE_NOFRICTION_RB_CONTACT
solveContactPreBlock, // DY_SC_TYPE_BLOCK_RB_CONTACT
solveContactPreBlock_Static, // DY_SC_TYPE_BLOCK_STATIC_RB_CONTACT
solve1D4_Block, // DY_SC_TYPE_BLOCK_1D,
};
static SolveWriteBackBlockMethod gVTableSolveWriteBackBlock[] PX_UNUSED_ATTRIBUTE =
{
0,
solveContactBlockWriteBack, // DY_SC_TYPE_RB_CONTACT
solve1DBlockWriteBack, // DY_SC_TYPE_RB_1D
DYNAMIC_ARTICULATION_REGISTRATION(solveExtContactBlockWriteBack), // DY_SC_TYPE_EXT_CONTACT
DYNAMIC_ARTICULATION_REGISTRATION(solveExt1DBlockWriteBack), // DY_SC_TYPE_EXT_1D
solveContact_BStaticBlockWriteBack, // DY_SC_TYPE_STATIC_CONTACT
solveContactBlockWriteBack, // DY_SC_TYPE_NOFRICTION_RB_CONTACT
solveContactPreBlock_WriteBack, // DY_SC_TYPE_BLOCK_RB_CONTACT
solveContactPreBlock_WriteBackStatic, // DY_SC_TYPE_BLOCK_STATIC_RB_CONTACT
solve1D4Block_WriteBack, // DY_SC_TYPE_BLOCK_1D,
};
static SolveBlockMethod gVTableSolveConcludeBlock[] PX_UNUSED_ATTRIBUTE =
{
0,
solveContactConcludeBlock, // DY_SC_TYPE_RB_CONTACT
solve1DConcludeBlock, // DY_SC_TYPE_RB_1D
DYNAMIC_ARTICULATION_REGISTRATION(solveExtContactConcludeBlock), // DY_SC_TYPE_EXT_CONTACT
DYNAMIC_ARTICULATION_REGISTRATION(solveExt1DConcludeBlock), // DY_SC_TYPE_EXT_1D
solveContact_BStaticConcludeBlock, // DY_SC_TYPE_STATIC_CONTACT
solveContactConcludeBlock, // DY_SC_TYPE_NOFRICTION_RB_CONTACT
solveContactPreBlock_Conclude, // DY_SC_TYPE_BLOCK_RB_CONTACT
solveContactPreBlock_ConcludeStatic, // DY_SC_TYPE_BLOCK_STATIC_RB_CONTACT
solve1D4Block_Conclude, // DY_SC_TYPE_BLOCK_1D,
};
void SolverCoreRegisterArticulationFns()
{
gVTableSolveBlock[DY_SC_TYPE_EXT_CONTACT] = solveExtContactBlock;
gVTableSolveBlock[DY_SC_TYPE_EXT_1D] = solveExt1DBlock;
gVTableSolveWriteBackBlock[DY_SC_TYPE_EXT_CONTACT] = solveExtContactBlockWriteBack;
gVTableSolveWriteBackBlock[DY_SC_TYPE_EXT_1D] = solveExt1DBlockWriteBack;
gVTableSolveConcludeBlock[DY_SC_TYPE_EXT_CONTACT] = solveExtContactConcludeBlock;
gVTableSolveConcludeBlock[DY_SC_TYPE_EXT_1D] = solveExt1DConcludeBlock;
}
SolveBlockMethod* getSolveBlockTable()
{
return gVTableSolveBlock;
}
SolveBlockMethod* getSolverConcludeBlockTable()
{
return gVTableSolveConcludeBlock;
}
SolveWriteBackBlockMethod* getSolveWritebackBlockTable()
{
return gVTableSolveWriteBackBlock;
}
SolverCoreGeneral* SolverCoreGeneral::create(bool fricEveryIteration)
{
SolverCoreGeneral* scg = reinterpret_cast<SolverCoreGeneral*>(
PX_ALLOC(sizeof(SolverCoreGeneral), "SolverCoreGeneral"));
if (scg)
{
new (scg) SolverCoreGeneral;
scg->frictionEveryIteration = fricEveryIteration;
}
return scg;
}
void SolverCoreGeneral::destroyV()
{
this->~SolverCoreGeneral();
PX_FREE(this);
}
void SolverCoreGeneral::solveV_Blocks(SolverIslandParams& params) const
{
const PxI32 TempThresholdStreamSize = 32;
ThresholdStreamElement tempThresholdStream[TempThresholdStreamSize];
SolverContext cache;
cache.solverBodyArray = params.bodyDataList;
cache.mThresholdStream = tempThresholdStream;
cache.mThresholdStreamLength = TempThresholdStreamSize;
cache.mThresholdStreamIndex = 0;
cache.writeBackIteration = false;
cache.Z = params.Z;
cache.deltaV = params.deltaV;
const PxI32 batchCount = PxI32(params.numConstraintHeaders);
PxSolverBody* PX_RESTRICT bodyListStart = params.bodyListStart;
const PxU32 bodyListSize = params.bodyListSize;
Cm::SpatialVector* PX_RESTRICT motionVelocityArray = params.motionVelocityArray;
const PxU32 velocityIterations = params.velocityIterations;
const PxU32 positionIterations = params.positionIterations;
const PxU32 numConstraintHeaders = params.numConstraintHeaders;
const PxU32 articulationListSize = params.articulationListSize;
ArticulationSolverDesc* PX_RESTRICT articulationListStart = params.articulationListStart;
PX_ASSERT(velocityIterations >= 1);
PX_ASSERT(positionIterations >= 1);
if(numConstraintHeaders == 0)
{
for (PxU32 baIdx = 0; baIdx < bodyListSize; baIdx++)
{
Cm::SpatialVector& motionVel = motionVelocityArray[baIdx];
const PxSolverBody& atom = bodyListStart[baIdx];
motionVel.linear = atom.linearVelocity;
motionVel.angular = atom.angularState;
}
//Even thought there are no external constraints, there may still be internal constraints in the articulations...
for(PxU32 i = 0; i < positionIterations; ++i)
for (PxU32 j = 0; j < articulationListSize; ++j)
articulationListStart[j].articulation->solveInternalConstraints(params.dt, params.invDt, cache.Z, cache.deltaV, false, false, 0.f);
for (PxU32 i = 0; i < articulationListSize; i++)
ArticulationPImpl::saveVelocity(articulationListStart[i], cache.deltaV);
for (PxU32 i = 0; i < velocityIterations; ++i)
for (PxU32 j = 0; j < articulationListSize; ++j)
articulationListStart[j].articulation->solveInternalConstraints(params.dt, params.invDt, cache.Z, cache.deltaV, true, false, 0.f);
for (PxU32 j = 0; j < articulationListSize; ++j)
articulationListStart[j].articulation->writebackInternalConstraints(false);
return;
}
BatchIterator contactIterator(params.constraintBatchHeaders, params.numConstraintHeaders);
PxSolverConstraintDesc* PX_RESTRICT constraintList = params.constraintList;
//0-(n-1) iterations
PxI32 normalIter = 0;
for (PxU32 iteration = positionIterations; iteration > 0; iteration--) //decreasing positive numbers == position iters
{
cache.doFriction = this->frictionEveryIteration ? true : iteration <= 3;
SolveBlockParallel(constraintList, batchCount, normalIter * batchCount, batchCount,
cache, contactIterator, iteration == 1 ? gVTableSolveConcludeBlock : gVTableSolveBlock, normalIter);
for (PxU32 i = 0; i < articulationListSize; ++i)
articulationListStart[i].articulation->solveInternalConstraints(params.dt, params.invDt, cache.Z, cache.deltaV, false, false, 0.f);
++normalIter;
}
for (PxU32 baIdx = 0; baIdx < bodyListSize; baIdx++)
{
const PxSolverBody& atom = bodyListStart[baIdx];
Cm::SpatialVector& motionVel = motionVelocityArray[baIdx];
motionVel.linear = atom.linearVelocity;
motionVel.angular = atom.angularState;
}
for (PxU32 i = 0; i < articulationListSize; i++)
ArticulationPImpl::saveVelocity(articulationListStart[i], cache.deltaV);
const PxI32 velItersMinOne = (PxI32(velocityIterations)) - 1;
PxI32 iteration = 0;
for(; iteration < velItersMinOne; ++iteration)
{
SolveBlockParallel(constraintList, batchCount, normalIter * batchCount, batchCount,
cache, contactIterator, gVTableSolveBlock, normalIter);
for (PxU32 i = 0; i < articulationListSize; ++i)
articulationListStart[i].articulation->solveInternalConstraints(params.dt, params.invDt, cache.Z, cache.deltaV, true, false, 0.f);
++normalIter;
}
PxI32* outThresholdPairs = params.outThresholdPairs;
ThresholdStreamElement* PX_RESTRICT thresholdStream = params.thresholdStream;
PxU32 thresholdStreamLength = params.thresholdStreamLength;
cache.writeBackIteration = true;
cache.mSharedThresholdStream = thresholdStream;
cache.mSharedThresholdStreamLength = thresholdStreamLength;
cache.mSharedOutThresholdPairs = outThresholdPairs;
//PGS solver always runs at least one velocity iteration (otherwise writeback won't happen)
{
SolveBlockParallel(constraintList, batchCount, normalIter * batchCount, batchCount,
cache, contactIterator, gVTableSolveWriteBackBlock, normalIter);
for (PxU32 i = 0; i < articulationListSize; ++i)
{
articulationListStart[i].articulation->solveInternalConstraints(params.dt, params.invDt, cache.Z, cache.deltaV, true, false, 0.f);
articulationListStart[i].articulation->writebackInternalConstraints(false);
}
++normalIter;
}
//Write back remaining threshold streams
if(cache.mThresholdStreamIndex > 0)
{
//Write back to global buffer
PxI32 threshIndex = physx::shdfnd::atomicAdd(outThresholdPairs, PxI32(cache.mThresholdStreamIndex)) - PxI32(cache.mThresholdStreamIndex);
for(PxU32 b = 0; b < cache.mThresholdStreamIndex; ++b)
{
thresholdStream[b + threshIndex] = cache.mThresholdStream[b];
}
cache.mThresholdStreamIndex = 0;
}
}
PxI32 SolverCoreGeneral::solveVParallelAndWriteBack
(SolverIslandParams& params, Cm::SpatialVectorF* Z, Cm::SpatialVectorF* deltaV) const
{
#if PX_PROFILE_SOLVE_STALLS
PxU64 startTime = readTimer();
PxU64 stallCount = 0;
#endif
SolverContext cache;
cache.solverBodyArray = params.bodyDataList;
const PxU32 batchSize = params.batchSize;
const PxI32 UnrollCount = PxI32(batchSize);
const PxI32 ArticCount = 2;
const PxI32 SaveUnrollCount = 32;
const PxI32 TempThresholdStreamSize = 32;
ThresholdStreamElement tempThresholdStream[TempThresholdStreamSize];
const PxI32 bodyListSize = PxI32(params.bodyListSize);
const PxI32 articulationListSize = PxI32(params.articulationListSize);
const PxI32 batchCount = PxI32(params.numConstraintHeaders);
cache.mThresholdStream = tempThresholdStream;
cache.mThresholdStreamLength = TempThresholdStreamSize;
cache.mThresholdStreamIndex = 0;
cache.writeBackIteration = false;
cache.Z = Z;
cache.deltaV = deltaV;
const PxReal dt = params.dt;
const PxReal invDt = params.invDt;
const PxI32 positionIterations = PxI32(params.positionIterations);
const PxI32 velocityIterations = PxI32(params.velocityIterations);
PxI32* constraintIndex = &params.constraintIndex;
PxI32* constraintIndex2 = &params.constraintIndex2;
PxI32* articIndex = &params.articSolveIndex;
PxI32* articIndex2 = &params.articSolveIndex2;
PxSolverConstraintDesc* PX_RESTRICT constraintList = params.constraintList;
ArticulationSolverDesc* PX_RESTRICT articulationListStart = params.articulationListStart;
const PxU32 nbPartitions = params.nbPartitions;
PxU32* headersPerPartition = params.headersPerPartition;
PX_UNUSED(velocityIterations);
PX_ASSERT(velocityIterations >= 1);
PX_ASSERT(positionIterations >= 1);
PxI32 endIndexCount = UnrollCount;
PxI32 index = physx::shdfnd::atomicAdd(constraintIndex, UnrollCount) - UnrollCount;
PxI32 articSolveStart = 0;
PxI32 articSolveEnd = 0;
PxI32 maxArticIndex = 0;
PxI32 articIndexCounter = 0;
BatchIterator contactIter(params.constraintBatchHeaders, params.numConstraintHeaders);
PxI32 maxNormalIndex = 0;
PxI32 normalIteration = 0;
PxU32 a = 0;
PxI32 targetConstraintIndex = 0;
PxI32 targetArticIndex = 0;
for(PxU32 i = 0; i < 2; ++i)
{
SolveBlockMethod* solveTable = i == 0 ? gVTableSolveBlock : gVTableSolveConcludeBlock;
for(; a < positionIterations - 1 + i; ++a)
{
WAIT_FOR_PROGRESS(articIndex2, targetArticIndex);
cache.doFriction = this->frictionEveryIteration ? true : (positionIterations - a) <= 3;
for(PxU32 b = 0; b < nbPartitions; ++b)
{
WAIT_FOR_PROGRESS(constraintIndex2, targetConstraintIndex);
maxNormalIndex += headersPerPartition[b];
PxI32 nbSolved = 0;
while(index < maxNormalIndex)
{
const PxI32 remainder = PxMin(maxNormalIndex - index, endIndexCount);
SolveBlockParallel(constraintList, remainder, index, batchCount, cache, contactIter, solveTable,
normalIteration);
index += remainder;
endIndexCount -= remainder;
nbSolved += remainder;
if(endIndexCount == 0)
{
endIndexCount = UnrollCount;
index = physx::shdfnd::atomicAdd(constraintIndex, UnrollCount) - UnrollCount;
}
}
if(nbSolved)
{
Ps::memoryBarrier();
physx::shdfnd::atomicAdd(constraintIndex2, nbSolved);
}
targetConstraintIndex += headersPerPartition[b]; //Increment target constraint index by batch count
}
WAIT_FOR_PROGRESS(constraintIndex2, targetConstraintIndex);
maxArticIndex += articulationListSize;
targetArticIndex += articulationListSize;
while (articSolveStart < maxArticIndex)
{
const PxI32 endIdx = PxMin(articSolveEnd, maxArticIndex);
PxI32 nbSolved = 0;
while (articSolveStart < endIdx)
{
articulationListStart[articSolveStart - articIndexCounter].articulation->solveInternalConstraints(dt, invDt, cache.Z, cache.deltaV, false, false, 0.f);
articSolveStart++;
nbSolved++;
}
if (nbSolved)
{
physx::shdfnd::atomicAdd(articIndex2, nbSolved);
}
const PxI32 remaining = articSolveEnd - articSolveStart;
if (remaining == 0)
{
articSolveStart = physx::shdfnd::atomicAdd(articIndex, ArticCount) - ArticCount;
articSolveEnd = articSolveStart + ArticCount;
}
}
articIndexCounter += articulationListSize;
++normalIteration;
}
}
PxI32* bodyListIndex = &params.bodyListIndex;
PxI32* bodyListIndex2 = &params.bodyListIndex2;
PxSolverBody* PX_RESTRICT bodyListStart = params.bodyListStart;
Cm::SpatialVector* PX_RESTRICT motionVelocityArray = params.motionVelocityArray;
//Save velocity - articulated
PxI32 endIndexCount2 = SaveUnrollCount;
PxI32 index2 = physx::shdfnd::atomicAdd(bodyListIndex, SaveUnrollCount) - SaveUnrollCount;
{
WAIT_FOR_PROGRESS(articIndex2, targetArticIndex);
WAIT_FOR_PROGRESS(constraintIndex2, targetConstraintIndex);
PxI32 nbConcluded = 0;
while(index2 < articulationListSize)
{
const PxI32 remainder = PxMin(SaveUnrollCount, (articulationListSize - index2));
endIndexCount2 -= remainder;
for(PxI32 b = 0; b < remainder; ++b, ++index2)
{
ArticulationPImpl::saveVelocity(articulationListStart[index2], cache.deltaV);
}
if(endIndexCount2 == 0)
{
index2 = physx::shdfnd::atomicAdd(bodyListIndex, SaveUnrollCount) - SaveUnrollCount;
endIndexCount2 = SaveUnrollCount;
}
nbConcluded += remainder;
}
index2 -= articulationListSize;
//save velocity
while(index2 < bodyListSize)
{
const PxI32 remainder = PxMin(endIndexCount2, (bodyListSize - index2));
endIndexCount2 -= remainder;
for(PxI32 b = 0; b < remainder; ++b, ++index2)
{
Ps::prefetchLine(&bodyListStart[index2 + 8]);
Ps::prefetchLine(&motionVelocityArray[index2 + 8]);
PxSolverBody& body = bodyListStart[index2];
Cm::SpatialVector& motionVel = motionVelocityArray[index2];
motionVel.linear = body.linearVelocity;
motionVel.angular = body.angularState;
PX_ASSERT(motionVel.linear.isFinite());
PX_ASSERT(motionVel.angular.isFinite());
}
nbConcluded += remainder;
//Branch not required because this is the last time we use this atomic variable
//if(index2 < articulationListSizePlusbodyListSize)
{
index2 = physx::shdfnd::atomicAdd(bodyListIndex, SaveUnrollCount) - SaveUnrollCount - articulationListSize;
endIndexCount2 = SaveUnrollCount;
}
}
if(nbConcluded)
{
Ps::memoryBarrier();
physx::shdfnd::atomicAdd(bodyListIndex2, nbConcluded);
}
}
WAIT_FOR_PROGRESS(bodyListIndex2, (bodyListSize + articulationListSize));
a = 1;
for(; a < params.velocityIterations; ++a)
{
WAIT_FOR_PROGRESS(articIndex2, targetArticIndex);
for(PxU32 b = 0; b < nbPartitions; ++b)
{
WAIT_FOR_PROGRESS(constraintIndex2, targetConstraintIndex);
maxNormalIndex += headersPerPartition[b];
PxI32 nbSolved = 0;
while(index < maxNormalIndex)
{
const PxI32 remainder = PxMin(maxNormalIndex - index, endIndexCount);
SolveBlockParallel(constraintList, remainder, index, batchCount, cache, contactIter, gVTableSolveBlock,
normalIteration);
index += remainder;
endIndexCount -= remainder;
nbSolved += remainder;
if(endIndexCount == 0)
{
endIndexCount = UnrollCount;
index = physx::shdfnd::atomicAdd(constraintIndex, UnrollCount) - UnrollCount;
}
}
if(nbSolved)
{
Ps::memoryBarrier();
physx::shdfnd::atomicAdd(constraintIndex2, nbSolved);
}
targetConstraintIndex += headersPerPartition[b]; //Increment target constraint index by batch count
}
WAIT_FOR_PROGRESS(constraintIndex2, targetConstraintIndex);
maxArticIndex += articulationListSize;
targetArticIndex += articulationListSize;
while (articSolveStart < maxArticIndex)
{
const PxI32 endIdx = PxMin(articSolveEnd, maxArticIndex);
PxI32 nbSolved = 0;
while (articSolveStart < endIdx)
{
articulationListStart[articSolveStart - articIndexCounter].articulation->solveInternalConstraints(dt, invDt, cache.Z, cache.deltaV, true, false, 0.f);
articSolveStart++;
nbSolved++;
}
if (nbSolved)
{
physx::shdfnd::atomicAdd(articIndex2, nbSolved);
}
const PxI32 remaining = articSolveEnd - articSolveStart;
if (remaining == 0)
{
articSolveStart = physx::shdfnd::atomicAdd(articIndex, ArticCount) - ArticCount;
articSolveEnd = articSolveStart + ArticCount;
}
}
++normalIteration;
articIndexCounter += articulationListSize;
}
ThresholdStreamElement* PX_RESTRICT thresholdStream = params.thresholdStream;
PxU32 thresholdStreamLength = params.thresholdStreamLength;
PxI32* outThresholdPairs = params.outThresholdPairs;
cache.mSharedOutThresholdPairs = outThresholdPairs;
cache.mSharedThresholdStream = thresholdStream;
cache.mSharedThresholdStreamLength = thresholdStreamLength;
//Last iteration - do writeback as well!
cache.writeBackIteration = true;
{
WAIT_FOR_PROGRESS(articIndex2, targetArticIndex);
for(PxU32 b = 0; b < nbPartitions; ++b)
{
WAIT_FOR_PROGRESS(constraintIndex2, targetConstraintIndex);
maxNormalIndex += headersPerPartition[b];
PxI32 nbSolved = 0;
while(index < maxNormalIndex)
{
const PxI32 remainder = PxMin(maxNormalIndex - index, endIndexCount);
SolveBlockParallel(constraintList, remainder, index, batchCount, cache, contactIter, gVTableSolveWriteBackBlock,
normalIteration);
index += remainder;
endIndexCount -= remainder;
nbSolved += remainder;
if(endIndexCount == 0)
{
endIndexCount = UnrollCount;
index = physx::shdfnd::atomicAdd(constraintIndex, UnrollCount) - UnrollCount;
}
}
if(nbSolved)
{
Ps::memoryBarrier();
physx::shdfnd::atomicAdd(constraintIndex2, nbSolved);
}
targetConstraintIndex += headersPerPartition[b]; //Increment target constraint index by batch count
}
{
WAIT_FOR_PROGRESS(constraintIndex2, targetConstraintIndex);
maxArticIndex += articulationListSize;
targetArticIndex += articulationListSize;
while (articSolveStart < maxArticIndex)
{
const PxI32 endIdx = PxMin(articSolveEnd, maxArticIndex);
PxI32 nbSolved = 0;
while (articSolveStart < endIdx)
{
articulationListStart[articSolveStart - articIndexCounter].articulation->solveInternalConstraints(dt, invDt, cache.Z, cache.deltaV, false, false, 0.f);
articulationListStart[articSolveStart - articIndexCounter].articulation->writebackInternalConstraints(false);
articSolveStart++;
nbSolved++;
}
if (nbSolved)
{
physx::shdfnd::atomicAdd(articIndex2, nbSolved);
}
PxI32 remaining = articSolveEnd - articSolveStart;
if (remaining == 0)
{
articSolveStart = physx::shdfnd::atomicAdd(articIndex, ArticCount) - ArticCount;
articSolveEnd = articSolveStart + ArticCount;
}
}
}
if(cache.mThresholdStreamIndex > 0)
{
//Write back to global buffer
PxI32 threshIndex = physx::shdfnd::atomicAdd(outThresholdPairs, PxI32(cache.mThresholdStreamIndex)) - PxI32(cache.mThresholdStreamIndex);
for(PxU32 b = 0; b < cache.mThresholdStreamIndex; ++b)
{
thresholdStream[b + threshIndex] = cache.mThresholdStream[b];
}
cache.mThresholdStreamIndex = 0;
}
++normalIteration;
}
#if PX_PROFILE_SOLVE_STALLS
PxU64 endTime = readTimer();
PxReal totalTime = (PxReal)(endTime - startTime);
PxReal stallTime = (PxReal)stallCount;
PxReal stallRatio = stallTime/totalTime;
if(0)//stallRatio > 0.2f)
{
LARGE_INTEGER frequency;
QueryPerformanceFrequency( &frequency );
printf("Warning -- percentage time stalled = %f; stalled for %f seconds; total Time took %f seconds\n",
stallRatio * 100.f, stallTime/(PxReal)frequency.QuadPart, totalTime/(PxReal)frequency.QuadPart);
}
#endif
return normalIteration * batchCount;
}
void SolverCoreGeneral::writeBackV
(const PxSolverConstraintDesc* PX_RESTRICT constraintList, const PxU32 /*constraintListSize*/, PxConstraintBatchHeader* batchHeaders, const PxU32 numBatches,
ThresholdStreamElement* PX_RESTRICT thresholdStream, const PxU32 thresholdStreamLength, PxU32& outThresholdPairs,
PxSolverBodyData* atomListData, WriteBackBlockMethod writeBackTable[]) const
{
SolverContext cache;
cache.solverBodyArray = atomListData;
cache.mThresholdStream = thresholdStream;
cache.mThresholdStreamLength = thresholdStreamLength;
cache.mThresholdStreamIndex = 0;
PxI32 outThreshIndex = 0;
for(PxU32 j = 0; j < numBatches; ++j)
{
PxU8 type = *constraintList[batchHeaders[j].startIndex].constraint;
writeBackTable[type](constraintList + batchHeaders[j].startIndex,
batchHeaders[j].stride, cache);
}
outThresholdPairs = PxU32(outThreshIndex);
}
void solveVBlock(SOLVEV_BLOCK_METHOD_ARGS)
{
solverCore->solveV_Blocks(params);
}
}
}
//#endif

View File

@ -0,0 +1,161 @@
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2021 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#ifndef DY_SOLVERCOREGENERAL_H
#define DY_SOLVERCOREGENERAL_H
#include "DySolverCore.h"
#include "DySolverConstraintDesc.h"
namespace physx
{
namespace Dy
{
struct FsData;
inline void BusyWaitState(volatile PxU32* state, const PxU32 requiredState)
{
while(requiredState != *state );
}
inline void WaitBodyRequiredState(PxU32* state, const PxU32 requiredState)
{
if(*state != requiredState)
{
BusyWaitState(state, requiredState);
}
}
inline void BusyWaitStates(volatile PxU32* stateA, volatile PxU32* stateB, const PxU32 requiredStateA, const PxU32 requiredStateB)
{
while(*stateA != requiredStateA);
while(*stateB != requiredStateB);
}
class BatchIterator
{
public:
PxConstraintBatchHeader* constraintBatchHeaders;
PxU32 mSize;
PxU32 mCurrentIndex;
BatchIterator(PxConstraintBatchHeader* _constraintBatchHeaders, PxU32 size) : constraintBatchHeaders(_constraintBatchHeaders),
mSize(size), mCurrentIndex(0)
{
}
PX_FORCE_INLINE const PxConstraintBatchHeader& GetCurrentHeader(const PxU32 constraintIndex)
{
PxU32 currentIndex = mCurrentIndex;
while((constraintIndex - constraintBatchHeaders[currentIndex].startIndex) >= constraintBatchHeaders[currentIndex].stride)
currentIndex = (currentIndex + 1)%mSize;
Ps::prefetchLine(&constraintBatchHeaders[currentIndex], 128);
mCurrentIndex = currentIndex;
return constraintBatchHeaders[currentIndex];
}
private:
BatchIterator& operator=(const BatchIterator&);
};
inline void SolveBlockParallel (PxSolverConstraintDesc* PX_RESTRICT constraintList, const PxI32 batchCount, const PxI32 index,
const PxI32 headerCount, SolverContext& cache, BatchIterator& iterator,
SolveBlockMethod solveTable[],
const PxI32 iteration
)
{
const PxI32 indA = index - (iteration * headerCount);
const PxConstraintBatchHeader* PX_RESTRICT headers = iterator.constraintBatchHeaders;
const PxI32 endIndex = indA + batchCount;
for(PxI32 i = indA; i < endIndex; ++i)
{
const PxConstraintBatchHeader& header = headers[i];
const PxI32 numToGrab = header.stride;
PxSolverConstraintDesc* PX_RESTRICT block = &constraintList[header.startIndex];
Ps::prefetch(block[0].constraint, 384);
for(PxI32 b = 0; b < numToGrab; ++b)
{
Ps::prefetchLine(block[b].bodyA);
Ps::prefetchLine(block[b].bodyB);
}
//OK. We have a number of constraints to run...
solveTable[header.constraintType](block, PxU32(numToGrab), cache);
}
}
class SolverCoreGeneral : public SolverCore
{
public:
bool frictionEveryIteration;
static SolverCoreGeneral* create(bool fricEveryIteration);
// Implements SolverCore
virtual void destroyV();
virtual PxI32 solveVParallelAndWriteBack
(SolverIslandParams& params, Cm::SpatialVectorF* Z, Cm::SpatialVectorF* deltaV) const;
virtual void solveV_Blocks
(SolverIslandParams& params) const;
virtual void writeBackV
(const PxSolverConstraintDesc* PX_RESTRICT constraintList, const PxU32 constraintListSize, PxConstraintBatchHeader* contactConstraintBatches, const PxU32 numBatches,
ThresholdStreamElement* PX_RESTRICT thresholdStream, const PxU32 thresholdStreamLength, PxU32& outThresholdPairs,
PxSolverBodyData* atomListData, WriteBackBlockMethod writeBackTable[]) const;
private:
//~Implements SolverCore
};
#define SOLVEV_BLOCK_METHOD_ARGS SolverCore* solverCore, SolverIslandParams& params
void solveVBlock(SOLVEV_BLOCK_METHOD_ARGS);
SolveBlockMethod* getSolveBlockTable();
SolveBlockMethod* getSolverConcludeBlockTable();
SolveWriteBackBlockMethod* getSolveWritebackBlockTable();
}
}
#endif //DY_SOLVERCOREGENERAL_H

View File

@ -0,0 +1,742 @@
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2021 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#include "foundation/PxPreprocessor.h"
#include "PsAllocator.h"
#include <new>
#include <stdio.h>
#include "CmPhysXCommon.h"
#include "DySolverBody.h"
#include "DySolverConstraint1D.h"
#include "DySolverContact.h"
#include "DyThresholdTable.h"
#include "DySolverControl.h"
#include "DyArticulationHelper.h"
#include "PsAtomic.h"
#include "PsIntrinsics.h"
#include "DyArticulationPImpl.h"
#include "PsThread.h"
#include "DySolverConstraintDesc.h"
#include "DySolverContext.h"
#include "DySolverControlPF.h"
namespace physx
{
namespace Dy
{
//-----------------------------------
void solve1DBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
void solveExt1DBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
void solve1D4_Block (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
void solve1DConcludeBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
void solveExt1DConcludeBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
void solve1D4Block_Conclude (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
void solve1DBlockWriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
void solveExt1DBlockWriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
void solve1D4Block_WriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
void writeBack1DBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
void ext1DBlockWriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
void writeBack1D4Block (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
void solveFrictionBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
void solveFriction_BStaticBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
void solveExtFrictionBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
void solveContactCoulombBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
void solveExtContactCoulombBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
void solveContactCoulomb_BStaticBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
void solveContactCoulombConcludeBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
void solveExtContactCoulombConcludeBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
void solveContactCoulomb_BStaticConcludeBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
void solveContactCoulombBlockWriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
void solveExtContactCoulombBlockWriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
void solveContactCoulomb_BStaticBlockWriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
void solveFrictionBlockWriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
void solveFriction_BStaticBlockWriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
void solveExtFrictionBlockWriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
//Pre-block 1d/2d friction stuff...
void solveContactCoulombPreBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
void solveContactCoulombPreBlock_Static (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
void solveContactCoulombPreBlock_Conclude (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
void solveContactCoulombPreBlock_ConcludeStatic (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
void solveContactCoulombPreBlock_WriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
void solveContactCoulombPreBlock_WriteBackStatic(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
void solveFrictionCoulombPreBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
void solveFrictionCoulombPreBlock_Static (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
void solveFrictionCoulombPreBlock_Conclude (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
void solveFrictionCoulombPreBlock_ConcludeStatic(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
void solveFrictionCoulombPreBlock_WriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
void solveFrictionCoulombPreBlock_WriteBackStatic(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
// could move this to PxPreprocessor.h but
// no implementation available for MSVC
#if PX_GCC_FAMILY
#define PX_UNUSED_ATTRIBUTE __attribute__((unused))
#else
#define PX_UNUSED_ATTRIBUTE
#endif
#define DYNAMIC_ARTICULATION_REGISTRATION(x) 0
static SolveBlockMethod gVTableSolveBlockCoulomb[] PX_UNUSED_ATTRIBUTE =
{
0,
solveContactCoulombBlock, // DY_SC_TYPE_RB_CONTACT
solve1DBlock, // DY_SC_TYPE_RB_1D
DYNAMIC_ARTICULATION_REGISTRATION(solveExtContactCoulombBlock), // DY_SC_TYPE_EXT_CONTACT
DYNAMIC_ARTICULATION_REGISTRATION(solveExt1DBlock), // DY_SC_TYPE_EXT_1D
solveContactCoulomb_BStaticBlock, // DY_SC_TYPE_STATIC_CONTACT
solveContactCoulombBlock, // DY_SC_TYPE_NOFRICTION_RB_CONTACT
solveContactCoulombPreBlock, // DY_SC_TYPE_BLOCK_RB_CONTACT
solveContactCoulombPreBlock_Static, // DY_SC_TYPE_BLOCK_STATIC_RB_CONTACT
solve1D4_Block, // DY_SC_TYPE_BLOCK_1D,
solveFrictionBlock, // DY_SC_TYPE_FRICTION_CONSTRAINT
solveFriction_BStaticBlock, // DY_SC_TYPE_STATIC_FRICTION_CONSTRAINT
DYNAMIC_ARTICULATION_REGISTRATION(solveExtFrictionBlock), // DY_SC_TYPE_EXT_FRICTION_CONSTRAINT
solveFrictionCoulombPreBlock, // DY_SC_TYPE_BLOCK_FRICTION
solveFrictionCoulombPreBlock_Static // DY_SC_TYPE_BLOCK_STATIC_FRICTION
};
static SolveWriteBackBlockMethod gVTableSolveWriteBackBlockCoulomb[] PX_UNUSED_ATTRIBUTE =
{
0,
solveContactCoulombBlockWriteBack, // DY_SC_TYPE_RB_CONTACT
solve1DBlockWriteBack, // DY_SC_TYPE_RB_1D
DYNAMIC_ARTICULATION_REGISTRATION(solveExtContactCoulombBlockWriteBack), // DY_SC_TYPE_EXT_CONTACT
DYNAMIC_ARTICULATION_REGISTRATION(solveExt1DBlockWriteBack), // DY_SC_TYPE_EXT_1D
solveContactCoulomb_BStaticBlockWriteBack, // DY_SC_TYPE_STATIC_CONTACT
solveContactCoulombBlockWriteBack, // DY_SC_TYPE_NOFRICTION_RB_CONTACT
solveContactCoulombPreBlock_WriteBack, // DY_SC_TYPE_BLOCK_RB_CONTACT
solveContactCoulombPreBlock_WriteBackStatic, // DY_SC_TYPE_BLOCK_STATIC_RB_CONTACT
solve1D4Block_WriteBack, // DY_SC_TYPE_BLOCK_1D,
solveFrictionBlockWriteBack, // DY_SC_TYPE_FRICTION_CONSTRAINT
solveFriction_BStaticBlockWriteBack, // DY_SC_TYPE_STATIC_FRICTION_CONSTRAINT
DYNAMIC_ARTICULATION_REGISTRATION(solveExtFrictionBlockWriteBack), // DY_SC_TYPE_EXT_FRICTION_CONSTRAINT
solveFrictionCoulombPreBlock_WriteBack, // DY_SC_TYPE_BLOCK_FRICTION
solveFrictionCoulombPreBlock_WriteBackStatic // DY_SC_TYPE_BLOCK_STATIC_FRICTION
};
static SolveBlockMethod gVTableSolveConcludeBlockCoulomb[] PX_UNUSED_ATTRIBUTE =
{
0,
solveContactCoulombConcludeBlock, // DY_SC_TYPE_RB_CONTACT
solve1DConcludeBlock, // DY_SC_TYPE_RB_1D
DYNAMIC_ARTICULATION_REGISTRATION(solveExtContactCoulombConcludeBlock), // DY_SC_TYPE_EXT_CONTACT
DYNAMIC_ARTICULATION_REGISTRATION(solveExt1DConcludeBlock), // DY_SC_TYPE_EXT_1D
solveContactCoulomb_BStaticConcludeBlock, // DY_SC_TYPE_STATIC_CONTACT
solveContactCoulombConcludeBlock, // DY_SC_TYPE_NOFRICTION_RB_CONTACT
solveContactCoulombPreBlock_Conclude, // DY_SC_TYPE_BLOCK_RB_CONTACT
solveContactCoulombPreBlock_ConcludeStatic, // DY_SC_TYPE_BLOCK_STATIC_RB_CONTACT
solve1D4Block_Conclude, // DY_SC_TYPE_BLOCK_1D,
solveFrictionBlock, // DY_SC_TYPE_FRICTION_CONSTRAINT
solveFriction_BStaticBlock, // DY_SC_TYPE_STATIC_FRICTION_CONSTRAINT
DYNAMIC_ARTICULATION_REGISTRATION(solveExtFrictionBlock), // DY_SC_TYPE_EXT_FRICTION_CONSTRAINT
solveFrictionCoulombPreBlock_Conclude, // DY_SC_TYPE_BLOCK_FRICTION
solveFrictionCoulombPreBlock_ConcludeStatic // DY_SC_TYPE_BLOCK_STATIC_FRICTION
};
void SolverCoreRegisterArticulationFnsCoulomb()
{
gVTableSolveBlockCoulomb[DY_SC_TYPE_EXT_CONTACT] = solveExtContactCoulombBlock;
gVTableSolveBlockCoulomb[DY_SC_TYPE_EXT_1D] = solveExt1DBlock;
gVTableSolveWriteBackBlockCoulomb[DY_SC_TYPE_EXT_CONTACT] = solveExtContactCoulombBlockWriteBack;
gVTableSolveWriteBackBlockCoulomb[DY_SC_TYPE_EXT_1D] = solveExt1DBlockWriteBack;
gVTableSolveConcludeBlockCoulomb[DY_SC_TYPE_EXT_CONTACT] = solveExtContactCoulombConcludeBlock;
gVTableSolveConcludeBlockCoulomb[DY_SC_TYPE_EXT_1D] = solveExt1DConcludeBlock;
gVTableSolveBlockCoulomb[DY_SC_TYPE_EXT_FRICTION] = solveExtFrictionBlock;
gVTableSolveWriteBackBlockCoulomb[DY_SC_TYPE_EXT_FRICTION] = solveExtFrictionBlockWriteBack;
gVTableSolveConcludeBlockCoulomb[DY_SC_TYPE_EXT_FRICTION] = solveExtFrictionBlock;
}
SolverCoreGeneralPF* SolverCoreGeneralPF::create()
{
SolverCoreGeneralPF* scg = reinterpret_cast<SolverCoreGeneralPF*>(
PX_ALLOC(sizeof(SolverCoreGeneralPF), "SolverCoreGeneral"));
if(scg)
new (scg) SolverCoreGeneralPF;
return scg;
}
void SolverCoreGeneralPF::destroyV()
{
this->~SolverCoreGeneralPF();
PX_FREE(this);
}
void SolverCoreGeneralPF::solveV_Blocks(SolverIslandParams& params) const
{
const PxI32 TempThresholdStreamSize = 32;
ThresholdStreamElement tempThresholdStream[TempThresholdStreamSize];
SolverContext cache;
cache.solverBodyArray = params.bodyDataList;
cache.mThresholdStream = tempThresholdStream;
cache.mThresholdStreamLength = TempThresholdStreamSize;
cache.mThresholdStreamIndex = 0;
cache.writeBackIteration = false;
cache.deltaV = params.deltaV;
cache.Z = params.Z;
PxI32 batchCount = PxI32(params.numConstraintHeaders);
PxSolverBody* PX_RESTRICT bodyListStart = params.bodyListStart;
const PxU32 bodyListSize = params.bodyListSize;
Cm::SpatialVector* PX_RESTRICT motionVelocityArray = params.motionVelocityArray;
const PxU32 velocityIterations = params.velocityIterations;
const PxU32 positionIterations = params.positionIterations;
const PxU32 numConstraintHeaders = params.numConstraintHeaders;
const PxU32 articulationListSize = params.articulationListSize;
ArticulationSolverDesc* PX_RESTRICT articulationListStart = params.articulationListStart;
PX_ASSERT(velocityIterations >= 1);
PX_ASSERT(positionIterations >= 1);
if(numConstraintHeaders == 0)
{
for (PxU32 baIdx = 0; baIdx < bodyListSize; baIdx++)
{
Cm::SpatialVector& motionVel = motionVelocityArray[baIdx];
PxSolverBody& atom = bodyListStart[baIdx];
motionVel.linear = atom.linearVelocity;
motionVel.angular = atom.angularState;
}
for (PxU32 i = 0; i < articulationListSize; i++)
ArticulationPImpl::saveVelocity(articulationListStart[i], cache.deltaV);
return;
}
BatchIterator contactIterator(params.constraintBatchHeaders, params.numConstraintHeaders);
BatchIterator frictionIterator(params.frictionConstraintBatches, params.numFrictionConstraintHeaders);
const PxI32 frictionBatchCount = PxI32(params.numFrictionConstraintHeaders);
PxSolverConstraintDesc* PX_RESTRICT constraintList = params.constraintList;
PxSolverConstraintDesc* PX_RESTRICT frictionConstraintList = params.frictionConstraintList;
//0-(n-1) iterations
PxI32 normalIter = 0;
PxI32 frictionIter = 0;
for (PxU32 iteration = positionIterations; iteration > 0; iteration--) //decreasing positive numbers == position iters
{
SolveBlockParallel(constraintList, batchCount, normalIter * batchCount, batchCount,
cache, contactIterator, iteration == 1 ? gVTableSolveConcludeBlockCoulomb : gVTableSolveBlockCoulomb, normalIter);
++normalIter;
}
if(frictionBatchCount>0)
{
const PxU32 numIterations = positionIterations * 2;
for (PxU32 iteration = numIterations; iteration > 0; iteration--) //decreasing positive numbers == position iters
{
SolveBlockParallel(frictionConstraintList, frictionBatchCount, frictionIter * frictionBatchCount, frictionBatchCount,
cache, frictionIterator, iteration == 1 ? gVTableSolveConcludeBlockCoulomb : gVTableSolveBlockCoulomb, frictionIter);
++frictionIter;
}
}
for (PxU32 baIdx = 0; baIdx < bodyListSize; baIdx++)
{
const PxSolverBody& atom = bodyListStart[baIdx];
Cm::SpatialVector& motionVel = motionVelocityArray[baIdx];
motionVel.linear = atom.linearVelocity;
motionVel.angular = atom.angularState;
}
for (PxU32 i = 0; i < articulationListSize; i++)
ArticulationPImpl::saveVelocity(articulationListStart[i], cache.deltaV);
const PxU32 velItersMinOne = velocityIterations - 1;
PxU32 iteration = 0;
for(; iteration < velItersMinOne; ++iteration)
{
SolveBlockParallel(constraintList, batchCount, normalIter * batchCount, batchCount,
cache, contactIterator, gVTableSolveBlockCoulomb, normalIter);
++normalIter;
if(frictionBatchCount > 0)
{
SolveBlockParallel(frictionConstraintList, frictionBatchCount, frictionIter * frictionBatchCount, frictionBatchCount,
cache, frictionIterator, gVTableSolveBlockCoulomb, frictionIter);
++frictionIter;
}
}
PxI32* outThresholdPairs = params.outThresholdPairs;
ThresholdStreamElement* PX_RESTRICT thresholdStream = params.thresholdStream;
PxU32 thresholdStreamLength = params.thresholdStreamLength;
cache.writeBackIteration = true;
cache.mSharedOutThresholdPairs = outThresholdPairs;
cache.mSharedThresholdStreamLength = thresholdStreamLength;
cache.mSharedThresholdStream = thresholdStream;
//PGS always runs one velocity iteration
{
SolveBlockParallel(constraintList, batchCount, normalIter * batchCount, batchCount,
cache, contactIterator, gVTableSolveWriteBackBlockCoulomb, normalIter);
++normalIter;
if(frictionBatchCount > 0)
{
SolveBlockParallel(frictionConstraintList, frictionBatchCount, frictionIter * frictionBatchCount, frictionBatchCount,
cache, frictionIterator, gVTableSolveWriteBackBlockCoulomb, frictionIter);
++frictionIter;
}
}
//Write back remaining threshold streams
if(cache.mThresholdStreamIndex > 0)
{
//Write back to global buffer
const PxI32 threshIndex = physx::shdfnd::atomicAdd(reinterpret_cast<PxI32*>(&outThresholdPairs), PxI32(cache.mThresholdStreamIndex)) - PxI32(cache.mThresholdStreamIndex);
for(PxU32 b = 0; b < cache.mThresholdStreamIndex; ++b)
{
thresholdStream[b + threshIndex] = cache.mThresholdStream[b];
}
cache.mThresholdStreamIndex = 0;
}
}
PxI32 SolverCoreGeneralPF::solveVParallelAndWriteBack(SolverIslandParams& params,
Cm::SpatialVectorF* Z, Cm::SpatialVectorF* deltaV) const
{
SolverContext cache;
cache.solverBodyArray = params.bodyDataList;
const PxI32 UnrollCount = PxI32(params.batchSize);
const PxI32 SaveUnrollCount = 64;
const PxI32 TempThresholdStreamSize = 32;
ThresholdStreamElement tempThresholdStream[TempThresholdStreamSize];
const PxI32 batchCount = PxI32(params.numConstraintHeaders);
const PxI32 frictionBatchCount = PxI32(params.numFrictionConstraintHeaders);//frictionConstraintBatches.size();
cache.mThresholdStream = tempThresholdStream;
cache.mThresholdStreamLength = TempThresholdStreamSize;
cache.mThresholdStreamIndex = 0;
cache.Z = Z;
cache.deltaV = deltaV;
const PxI32 positionIterations = PxI32(params.positionIterations);
const PxU32 velocityIterations = params.velocityIterations;
const PxI32 bodyListSize = PxI32(params.bodyListSize);
const PxI32 articulationListSize = PxI32(params.articulationListSize);
PX_ASSERT(velocityIterations >= 1);
PX_ASSERT(positionIterations >= 1);
PxI32* constraintIndex = &params.constraintIndex;
PxI32* constraintIndex2 = &params.constraintIndex2;
PxI32* frictionConstraintIndex = &params.frictionConstraintIndex;
PxI32 endIndexCount = UnrollCount;
PxI32 index = physx::shdfnd::atomicAdd(constraintIndex, UnrollCount) - UnrollCount;
PxI32 frictionIndex = physx::shdfnd::atomicAdd(frictionConstraintIndex, UnrollCount) - UnrollCount;
BatchIterator contactIter(params.constraintBatchHeaders, params.numConstraintHeaders);
BatchIterator frictionIter(params.frictionConstraintBatches, params.numFrictionConstraintHeaders);
PxU32* headersPerPartition = params.headersPerPartition;
PxU32 nbPartitions = params.nbPartitions;
PxU32* frictionHeadersPerPartition = params.frictionHeadersPerPartition;
PxU32 nbFrictionPartitions = params.nbFrictionPartitions;
PxSolverConstraintDesc* PX_RESTRICT constraintList = params.constraintList;
PxSolverConstraintDesc* PX_RESTRICT frictionConstraintList = params.frictionConstraintList;
PxI32 maxNormalIndex = 0;
PxI32 maxProgress = 0;
PxI32 frictionEndIndexCount = UnrollCount;
PxI32 maxFrictionIndex = 0;
PxI32 normalIteration = 0;
PxI32 frictionIteration = 0;
PxU32 a = 0;
for(PxU32 i = 0; i < 2; ++i)
{
SolveBlockMethod* solveTable = i == 0 ? gVTableSolveBlockCoulomb : gVTableSolveConcludeBlockCoulomb;
for(; a < positionIterations - 1 + i; ++a)
{
for(PxU32 b = 0; b < nbPartitions; ++b)
{
WAIT_FOR_PROGRESS(constraintIndex2, maxProgress);
maxNormalIndex += headersPerPartition[b];
maxProgress += headersPerPartition[b];
PxI32 nbSolved = 0;
while(index < maxNormalIndex)
{
const PxI32 remainder = PxMin(maxNormalIndex - index, endIndexCount);
SolveBlockParallel(constraintList, remainder, index, batchCount, cache, contactIter, solveTable,
normalIteration);
index += remainder;
endIndexCount -= remainder;
nbSolved += remainder;
if(endIndexCount == 0)
{
endIndexCount = UnrollCount;
index = physx::shdfnd::atomicAdd(constraintIndex, UnrollCount) - UnrollCount;
}
}
if(nbSolved)
{
Ps::memoryBarrier();
Ps::atomicAdd(constraintIndex2, nbSolved);
}
}
++normalIteration;
}
}
for(PxU32 i = 0; i < 2; ++i)
{
SolveBlockMethod* solveTable = i == 0 ? gVTableSolveBlockCoulomb : gVTableSolveConcludeBlockCoulomb;
const PxI32 numIterations = positionIterations *2;
for(; a < numIterations - 1 + i; ++a)
{
for(PxU32 b = 0; b < nbFrictionPartitions; ++b)
{
WAIT_FOR_PROGRESS(constraintIndex2, maxProgress);
maxProgress += frictionHeadersPerPartition[b];
maxFrictionIndex += frictionHeadersPerPartition[b];
PxI32 nbSolved = 0;
while(frictionIndex < maxFrictionIndex)
{
const PxI32 remainder = PxMin(maxFrictionIndex - frictionIndex, frictionEndIndexCount);
SolveBlockParallel(frictionConstraintList, remainder, frictionIndex, frictionBatchCount, cache, frictionIter,
solveTable, frictionIteration);
frictionIndex += remainder;
frictionEndIndexCount -= remainder;
nbSolved += remainder;
if(frictionEndIndexCount == 0)
{
frictionEndIndexCount = UnrollCount;
frictionIndex = physx::shdfnd::atomicAdd(frictionConstraintIndex, UnrollCount) - UnrollCount;
}
}
if(nbSolved)
{
Ps::memoryBarrier();
Ps::atomicAdd(constraintIndex2, nbSolved);
}
}
++frictionIteration;
}
}
WAIT_FOR_PROGRESS(constraintIndex2, maxProgress);
PxI32* bodyListIndex = &params.bodyListIndex;
ArticulationSolverDesc* PX_RESTRICT articulationListStart = params.articulationListStart;
PxSolverBody* PX_RESTRICT bodyListStart = params.bodyListStart;
Cm::SpatialVector* PX_RESTRICT motionVelocityArray = params.motionVelocityArray;
PxI32* bodyListIndex2 = &params.bodyListIndex2;
PxI32 endIndexCount2 = SaveUnrollCount;
PxI32 index2 = physx::shdfnd::atomicAdd(bodyListIndex, SaveUnrollCount) - SaveUnrollCount;
{
PxI32 nbConcluded = 0;
while(index2 < articulationListSize)
{
const PxI32 remainder = PxMin(SaveUnrollCount, (articulationListSize - index2));
endIndexCount2 -= remainder;
for(PxI32 b = 0; b < remainder; ++b, ++index2)
{
ArticulationPImpl::saveVelocity(articulationListStart[index2], cache.deltaV);
}
nbConcluded += remainder;
if(endIndexCount2 == 0)
{
index2 = physx::shdfnd::atomicAdd(bodyListIndex, SaveUnrollCount) - SaveUnrollCount;
endIndexCount2 = SaveUnrollCount;
}
nbConcluded += remainder;
}
index2 -= articulationListSize;
//save velocity
while(index2 < bodyListSize)
{
const PxI32 remainder = PxMin(endIndexCount2, (bodyListSize - index2));
endIndexCount2 -= remainder;
for(PxI32 b = 0; b < remainder; ++b, ++index2)
{
Ps::prefetchLine(&bodyListStart[index2 + 8]);
Ps::prefetchLine(&motionVelocityArray[index2 + 8]);
PxSolverBody& body = bodyListStart[index2];
Cm::SpatialVector& motionVel = motionVelocityArray[index2];
motionVel.linear = body.linearVelocity;
motionVel.angular = body.angularState;
PX_ASSERT(motionVel.linear.isFinite());
PX_ASSERT(motionVel.angular.isFinite());
}
nbConcluded += remainder;
//Branch not required because this is the last time we use this atomic variable
//if(index2 < articulationListSizePlusbodyListSize)
{
index2 = physx::shdfnd::atomicAdd(bodyListIndex, SaveUnrollCount) - SaveUnrollCount - articulationListSize;
endIndexCount2 = SaveUnrollCount;
}
}
if(nbConcluded)
{
Ps::memoryBarrier();
physx::shdfnd::atomicAdd(bodyListIndex2, nbConcluded);
}
}
WAIT_FOR_PROGRESS(bodyListIndex2, (bodyListSize + articulationListSize));
a = 0;
for(; a < velocityIterations-1; ++a)
{
for(PxU32 b = 0; b < nbPartitions; ++b)
{
WAIT_FOR_PROGRESS(constraintIndex2, maxProgress);
maxNormalIndex += headersPerPartition[b];
maxProgress += headersPerPartition[b];
PxI32 nbSolved = 0;
while(index < maxNormalIndex)
{
const PxI32 remainder = PxMin(maxNormalIndex - index, endIndexCount);
SolveBlockParallel(constraintList, remainder, index, batchCount, cache, contactIter, gVTableSolveBlockCoulomb, normalIteration);
index += remainder;
endIndexCount -= remainder;
nbSolved += remainder;
if(endIndexCount == 0)
{
endIndexCount = UnrollCount;
index = physx::shdfnd::atomicAdd(constraintIndex, UnrollCount) - UnrollCount;
}
}
if(nbSolved)
{
Ps::memoryBarrier();
Ps::atomicAdd(constraintIndex2, nbSolved);
}
}
++normalIteration;
for(PxU32 b = 0; b < nbFrictionPartitions; ++b)
{
WAIT_FOR_PROGRESS(constraintIndex2, maxProgress);
maxFrictionIndex += frictionHeadersPerPartition[b];
maxProgress += frictionHeadersPerPartition[b];
PxI32 nbSolved = 0;
while(frictionIndex < maxFrictionIndex)
{
const PxI32 remainder = PxMin(maxFrictionIndex - frictionIndex, frictionEndIndexCount);
SolveBlockParallel(constraintList, remainder, index, batchCount, cache, contactIter, gVTableSolveBlockCoulomb,
normalIteration);
frictionIndex += remainder;
frictionEndIndexCount -= remainder;
nbSolved += remainder;
if(frictionEndIndexCount == 0)
{
frictionEndIndexCount = UnrollCount;
frictionIndex = physx::shdfnd::atomicAdd(frictionConstraintIndex, UnrollCount) - UnrollCount;
}
}
if(nbSolved)
{
Ps::memoryBarrier();
Ps::atomicAdd(constraintIndex2, nbSolved);
}
}
++frictionIteration;
}
ThresholdStreamElement* PX_RESTRICT thresholdStream = params.thresholdStream;
const PxU32 thresholdStreamLength = params.thresholdStreamLength;
PxI32* outThresholdPairs = params.outThresholdPairs;
cache.mSharedThresholdStream = thresholdStream;
cache.mSharedOutThresholdPairs = outThresholdPairs;
cache.mSharedThresholdStreamLength = thresholdStreamLength;
{
for(PxU32 b = 0; b < nbPartitions; ++b)
{
WAIT_FOR_PROGRESS(constraintIndex2, maxProgress);
maxNormalIndex += headersPerPartition[b];
maxProgress += headersPerPartition[b];
PxI32 nbSolved = 0;
while(index < maxNormalIndex)
{
const PxI32 remainder = PxMin(maxNormalIndex - index, endIndexCount);
SolveBlockParallel(constraintList, remainder, normalIteration * batchCount, batchCount,
cache, contactIter, gVTableSolveWriteBackBlockCoulomb, normalIteration);
index += remainder;
endIndexCount -= remainder;
nbSolved += remainder;
if(endIndexCount == 0)
{
endIndexCount = UnrollCount;
index = physx::shdfnd::atomicAdd(constraintIndex, UnrollCount) - UnrollCount;
}
}
if(nbSolved)
{
Ps::memoryBarrier();
Ps::atomicAdd(constraintIndex2, nbSolved);
}
}
++normalIteration;
cache.mSharedOutThresholdPairs = outThresholdPairs;
cache.mSharedThresholdStream = thresholdStream;
cache.mSharedThresholdStreamLength = thresholdStreamLength;
for(PxU32 b = 0; b < nbFrictionPartitions; ++b)
{
WAIT_FOR_PROGRESS(constraintIndex2, maxProgress);
maxFrictionIndex += frictionHeadersPerPartition[b];
maxProgress += frictionHeadersPerPartition[b];
PxI32 nbSolved = 0;
while(frictionIndex < maxFrictionIndex)
{
const PxI32 remainder = PxMin(maxFrictionIndex - frictionIndex, frictionEndIndexCount);
SolveBlockParallel(frictionConstraintList, remainder, frictionIndex, frictionBatchCount, cache, frictionIter,
gVTableSolveWriteBackBlockCoulomb, frictionIteration);
frictionIndex += remainder;
frictionEndIndexCount -= remainder;
nbSolved += remainder;
if(frictionEndIndexCount == 0)
{
frictionEndIndexCount = UnrollCount;
frictionIndex = physx::shdfnd::atomicAdd(frictionConstraintIndex, UnrollCount) - UnrollCount;
}
}
if(nbSolved)
{
Ps::memoryBarrier();
Ps::atomicAdd(constraintIndex2, nbSolved);
}
}
if(cache.mThresholdStreamIndex > 0)
{
//Write back to global buffer
PxI32 threshIndex = physx::shdfnd::atomicAdd(outThresholdPairs, PxI32(cache.mThresholdStreamIndex)) - PxI32(cache.mThresholdStreamIndex);
for(PxU32 b = 0; b < cache.mThresholdStreamIndex; ++b)
{
thresholdStream[b + threshIndex] = cache.mThresholdStream[b];
}
cache.mThresholdStreamIndex = 0;
}
++frictionIteration;
}
return normalIteration * batchCount + frictionIteration * frictionBatchCount;
}
void SolverCoreGeneralPF::writeBackV
(const PxSolverConstraintDesc* PX_RESTRICT constraintList, const PxU32 /*constraintListSize*/, PxConstraintBatchHeader* batchHeaders, const PxU32 numBatches,
ThresholdStreamElement* PX_RESTRICT thresholdStream, const PxU32 thresholdStreamLength, PxU32& outThresholdPairs,
PxSolverBodyData* atomListData, WriteBackBlockMethod writeBackTable[]) const
{
SolverContext cache;
cache.solverBodyArray = atomListData;
cache.mThresholdStream = thresholdStream;
cache.mThresholdStreamLength = thresholdStreamLength;
cache.mThresholdStreamIndex = 0;
PxI32 outThreshIndex = 0;
for(PxU32 j = 0; j < numBatches; ++j)
{
PxU8 type = *constraintList[batchHeaders[j].startIndex].constraint;
writeBackTable[type](constraintList + batchHeaders[j].startIndex,
batchHeaders[j].stride, cache);
}
outThresholdPairs = PxU32(outThreshIndex);
}
}
}
//#endif

View File

@ -0,0 +1,71 @@
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2021 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#ifndef DY_SOLVERCONTROLPF_H
#define DY_SOLVERCONTROLPF_H
#include "DySolverCore.h"
#include "DySolverConstraintDesc.h"
namespace physx
{
namespace Dy
{
class SolverCoreGeneralPF : public SolverCore
{
public:
static SolverCoreGeneralPF* create();
// Implements SolverCore
virtual void destroyV();
virtual PxI32 solveVParallelAndWriteBack
(SolverIslandParams& params, Cm::SpatialVectorF* Z, Cm::SpatialVectorF* deltaV) const;
virtual void solveV_Blocks
(SolverIslandParams& params) const;
virtual void writeBackV
(const PxSolverConstraintDesc* PX_RESTRICT constraintList, const PxU32 constraintListSize, PxConstraintBatchHeader* contactConstraintBatches, const PxU32 numBatches,
ThresholdStreamElement* PX_RESTRICT thresholdStream, const PxU32 thresholdStreamLength, PxU32& outThresholdPairs,
PxSolverBodyData* atomListData, WriteBackBlockMethod writeBackTable[]) const;
private:
//~Implements SolverCore
};
}
}
#endif //DY_SOLVERCOREGENERALPF_H

View File

@ -0,0 +1,251 @@
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2021 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#ifndef DY_SOLVERCORE_H
#define DY_SOLVERCORE_H
#include "PxvConfig.h"
#include "PsArray.h"
#include "PsThread.h"
namespace physx
{
struct PxSolverBody;
struct PxSolverBodyData;
struct PxSolverConstraintDesc;
struct PxConstraintBatchHeader;
namespace Dy
{
struct ThresholdStreamElement;
struct ArticulationSolverDesc;
class Articulation;
struct SolverContext;
typedef void (*WriteBackMethod)(const PxSolverConstraintDesc& desc, SolverContext& cache, PxSolverBodyData& sbd0, PxSolverBodyData& sbd1);
typedef void (*SolveMethod)(const PxSolverConstraintDesc& desc, SolverContext& cache);
typedef void (*SolveBlockMethod)(const PxSolverConstraintDesc* desc, const PxU32 constraintCount, SolverContext& cache);
typedef void (*SolveWriteBackBlockMethod)(const PxSolverConstraintDesc* desc, const PxU32 constraintCount, SolverContext& cache);
typedef void (*WriteBackBlockMethod)(const PxSolverConstraintDesc* desc, const PxU32 constraintCount, SolverContext& cache);
#define PX_PROFILE_SOLVE_STALLS 0
#if PX_PROFILE_SOLVE_STALLS
#if PX_WINDOWS
#include <windows.h>
PX_FORCE_INLINE PxU64 readTimer()
{
//return __rdtsc();
LARGE_INTEGER i;
QueryPerformanceCounter(&i);
return i.QuadPart;
}
#endif
#endif
#define YIELD_THREADS 1
#if YIELD_THREADS
#define ATTEMPTS_BEFORE_BACKOFF 30000
#define ATTEMPTS_BEFORE_RETEST 10000
#endif
PX_INLINE void WaitForProgressCount(volatile PxI32* pGlobalIndex, const PxI32 targetIndex)
{
#if YIELD_THREADS
if(*pGlobalIndex < targetIndex)
{
bool satisfied = false;
PxU32 count = ATTEMPTS_BEFORE_BACKOFF;
do
{
satisfied = true;
while(*pGlobalIndex < targetIndex)
{
if(--count == 0)
{
satisfied = false;
break;
}
}
if(!satisfied)
Ps::Thread::yield();
count = ATTEMPTS_BEFORE_RETEST;
}
while(!satisfied);
}
#else
while(*pGlobalIndex < targetIndex);
#endif
}
#if PX_PROFILE_SOLVE_STALLS
PX_INLINE void WaitForProgressCount(volatile PxI32* pGlobalIndex, const PxI32 targetIndex, PxU64& stallTime)
{
if(*pGlobalIndex < targetIndex)
{
bool satisfied = false;
PxU32 count = ATTEMPTS_BEFORE_BACKOFF;
do
{
satisfied = true;
PxU64 startTime = readTimer();
while(*pGlobalIndex < targetIndex)
{
if(--count == 0)
{
satisfied = false;
break;
}
}
PxU64 endTime = readTimer();
stallTime += (endTime - startTime);
if(!satisfied)
Ps::Thread::yield();
count = ATTEMPTS_BEFORE_BACKOFF;
}
while(!satisfied);
}
}
#define WAIT_FOR_PROGRESS(pGlobalIndex, targetIndex) if(*pGlobalIndex < targetIndex) WaitForProgressCount(pGlobalIndex, targetIndex, stallCount)
#else
#define WAIT_FOR_PROGRESS(pGlobalIndex, targetIndex) if(*pGlobalIndex < targetIndex) WaitForProgressCount(pGlobalIndex, targetIndex)
#endif
#define WAIT_FOR_PROGRESS_NO_TIMER(pGlobalIndex, targetIndex) if(*pGlobalIndex < targetIndex) WaitForProgressCount(pGlobalIndex, targetIndex)
struct SolverIslandParams
{
//Default friction model params
PxU32 positionIterations;
PxU32 velocityIterations;
PxSolverBody* PX_RESTRICT bodyListStart;
PxSolverBodyData* PX_RESTRICT bodyDataList;
PxU32 bodyListSize;
PxU32 solverBodyOffset;
ArticulationSolverDesc* PX_RESTRICT articulationListStart;
PxU32 articulationListSize;
PxSolverConstraintDesc* PX_RESTRICT constraintList;
PxConstraintBatchHeader* constraintBatchHeaders;
PxU32 numConstraintHeaders;
PxU32* headersPerPartition;
PxU32 nbPartitions;
Cm::SpatialVector* PX_RESTRICT motionVelocityArray;
PxU32 batchSize;
PxsBodyCore*const* bodyArray;
PxsRigidBody** PX_RESTRICT rigidBodies;
//Shared state progress counters
PxI32 constraintIndex;
PxI32 constraintIndex2;
PxI32 bodyListIndex;
PxI32 bodyListIndex2;
PxI32 articSolveIndex;
PxI32 articSolveIndex2;
PxI32 bodyIntegrationListIndex;
PxI32 numObjectsIntegrated;
PxReal dt;
PxReal invDt;
//Additional 1d/2d friction model params
PxSolverConstraintDesc* PX_RESTRICT frictionConstraintList;
PxConstraintBatchHeader* frictionConstraintBatches;
PxU32 numFrictionConstraintHeaders;
PxU32* frictionHeadersPerPartition;
PxU32 nbFrictionPartitions;
//Additional Shared state progress counters
PxI32 frictionConstraintIndex;
//Write-back threshold information
ThresholdStreamElement* PX_RESTRICT thresholdStream;
PxU32 thresholdStreamLength;
PxI32* outThresholdPairs;
PxU32 mMaxArticulationLinks;
Cm::SpatialVectorF* Z;
Cm::SpatialVectorF* deltaV;
};
/*!
Interface to constraint solver cores
*/
class SolverCore
{
public:
virtual void destroyV() = 0;
virtual ~SolverCore() {}
/*
solves dual problem exactly by GS-iterating until convergence stops
only uses regular velocity vector for storing results, and backs up initial state, which is restored.
the solution forces are saved in a vector.
state should not be stored, this function is safe to call from multiple threads.
Returns the total number of constraints that should be solved across all threads. Used for synchronization outside of this method
*/
virtual PxI32 solveVParallelAndWriteBack
(SolverIslandParams& params, Cm::SpatialVectorF* Z, Cm::SpatialVectorF* deltaV) const = 0;
virtual void solveV_Blocks
(SolverIslandParams& params) const = 0;
virtual void writeBackV
(const PxSolverConstraintDesc* PX_RESTRICT constraintList, const PxU32 constraintListSize, PxConstraintBatchHeader* contactConstraintBatches, const PxU32 numConstraintBatches,
ThresholdStreamElement* PX_RESTRICT thresholdStream, const PxU32 thresholdStreamLength, PxU32& outThresholdPairs,
PxSolverBodyData* atomListData, WriteBackBlockMethod writeBackTable[]) const = 0;
};
}
}
#endif //DY_SOLVERCORE_H

View File

@ -0,0 +1,95 @@
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2021 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#ifndef DY_SOLVEREXTBODY_H
#define DY_SOLVEREXTBODY_H
#include "foundation/PxVec3.h"
#include "foundation/PxTransform.h"
#include "CmPhysXCommon.h"
#include "CmSpatialVector.h"
#include "PsVecMath.h"
namespace physx
{
class PxsRigidBody;
struct PxsBodyCore;
struct PxSolverBody;
struct PxSolverBodyData;
namespace Dy
{
class ArticulationV;
struct SolverConstraint1D;
class SolverExtBody
{
public:
union
{
const ArticulationV* mArticulation;
const PxSolverBody* mBody;
};
const PxSolverBodyData* mBodyData;
PxU16 mLinkIndex;
SolverExtBody(const void* bodyOrArticulation, const void* bodyData, PxU16 linkIndex):
mBody(reinterpret_cast<const PxSolverBody*>(bodyOrArticulation)),
mBodyData(reinterpret_cast<const PxSolverBodyData*>(bodyData)),
mLinkIndex(linkIndex)
{}
void getResponse(const PxVec3& linImpulse, const PxVec3& angImpulse,
PxVec3& linDeltaV, PxVec3& angDeltaV, PxReal dominance) const;
void getResponse(const Ps::aos::Vec3V& linImpulse, const Ps::aos::Vec3V& angImpulse,
Ps::aos::Vec3V& linDeltaV, Ps::aos::Vec3V& angDeltaV, Ps::aos::FloatV dominance) const;
PxReal projectVelocity(const PxVec3& linear, const PxVec3& angular) const;
Ps::aos::FloatV projectVelocity(const Ps::aos::Vec3V& linear, const Ps::aos::Vec3V& angular) const;
PxVec3 getLinVel() const;
PxVec3 getAngVel() const;
Ps::aos::Vec3V getLinVelV() const;
Ps::aos::Vec3V getAngVelV() const;
Cm::SpatialVectorV getVelocity() const;
};
}
}
#endif //DY_SOLVEREXTBODY_H

View File

@ -0,0 +1,879 @@
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2021 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#include "foundation/PxPreprocessor.h"
#include "PsVecMath.h"
#include "CmPhysXCommon.h"
#include "DySolverBody.h"
#include "DySolverContact.h"
#include "DySolverContactPF.h"
#include "DySolverConstraint1D.h"
#include "DySolverConstraintDesc.h"
#include "DyThresholdTable.h"
#include "DySolverContext.h"
#include "PsUtilities.h"
#include "DyConstraint.h"
#include "PsAtomic.h"
#include "DyThresholdTable.h"
#include "DySolverConstraintsShared.h"
namespace physx
{
namespace Dy
{
void solveContactCoulomb(const PxSolverConstraintDesc& desc, SolverContext& /*cache*/)
{
PxSolverBody& b0 = *desc.bodyA;
PxSolverBody& b1 = *desc.bodyB;
Vec3V linVel0 = V3LoadA(b0.linearVelocity);
Vec3V linVel1 = V3LoadA(b1.linearVelocity);
Vec3V angState0 = V3LoadA(b0.angularState);
Vec3V angState1 = V3LoadA(b1.angularState);
SolverContactCoulombHeader* PX_RESTRICT firstHeader = reinterpret_cast<SolverContactCoulombHeader*>(desc.constraint);
const PxU8* PX_RESTRICT last = desc.constraint + firstHeader->frictionOffset;//getConstraintLength(desc);
//hopefully pointer aliasing doesn't bite.
PxU8* PX_RESTRICT currPtr = desc.constraint;
//const FloatV zero = FZero();
while(currPtr < last)
{
SolverContactCoulombHeader* PX_RESTRICT hdr = reinterpret_cast<SolverContactCoulombHeader*>(currPtr);
currPtr += sizeof(SolverContactCoulombHeader);
const PxU32 numNormalConstr = hdr->numNormalConstr;
const Vec3V normal = hdr->getNormal();
const FloatV invMassDom0 = FLoad(hdr->dominance0);
const FloatV invMassDom1 = FLoad(hdr->dominance1);
const FloatV angD0 = FLoad(hdr->angDom0);
const FloatV angD1 = FLoad(hdr->angDom1);
SolverContactPoint* PX_RESTRICT contacts = reinterpret_cast<SolverContactPoint*>(currPtr);
currPtr += numNormalConstr * sizeof(SolverContactPoint);
PxF32* appliedImpulse = reinterpret_cast<PxF32*> ((reinterpret_cast<PxU8*>(hdr)) + hdr->frictionOffset + sizeof(SolverFrictionHeader));
Ps::prefetchLine(appliedImpulse);
solveDynamicContacts(contacts, numNormalConstr, normal, invMassDom0, invMassDom1,
angD0, angD1, linVel0, angState0, linVel1, angState1, appliedImpulse);
}
// Write back
V3StoreA(linVel0, b0.linearVelocity);
V3StoreA(linVel1, b1.linearVelocity);
V3StoreA(angState0, b0.angularState);
V3StoreA(angState1, b1.angularState);
PX_ASSERT(currPtr == last);
}
void solveFriction(const PxSolverConstraintDesc& desc, SolverContext& /*cache*/)
{
PxSolverBody& b0 = *desc.bodyA;
PxSolverBody& b1 = *desc.bodyB;
Vec3V linVel0 = V3LoadA(b0.linearVelocity);
Vec3V linVel1 = V3LoadA(b1.linearVelocity);
Vec3V angState0 = V3LoadA(b0.angularState);
Vec3V angState1 = V3LoadA(b1.angularState);
PxU8* PX_RESTRICT ptr = desc.constraint;
PxU8* PX_RESTRICT currPtr = ptr;
const PxU8* PX_RESTRICT last = ptr + getConstraintLength(desc);
while(currPtr < last)
{
const SolverFrictionHeader* PX_RESTRICT frictionHeader = reinterpret_cast<SolverFrictionHeader*>(currPtr);
currPtr += sizeof(SolverFrictionHeader);
PxF32* appliedImpulse = reinterpret_cast<PxF32*>(currPtr);
currPtr += frictionHeader->getAppliedForcePaddingSize();
SolverContactFriction* PX_RESTRICT frictions = reinterpret_cast<SolverContactFriction*>(currPtr);
const PxU32 numFrictionConstr = frictionHeader->numFrictionConstr;
const PxU32 numNormalConstr = frictionHeader->numNormalConstr;
const PxU32 numFrictionPerPoint = numFrictionConstr/numNormalConstr;
currPtr += numFrictionConstr * sizeof(SolverContactFriction);
const FloatV staticFriction = frictionHeader->getStaticFriction();
const FloatV invMass0D0 = FLoad(frictionHeader->invMass0D0);
const FloatV invMass1D1 = FLoad(frictionHeader->invMass1D1);
const FloatV angD0 = FLoad(frictionHeader->angDom0);
const FloatV angD1 = FLoad(frictionHeader->angDom1);
for(PxU32 i=0, j = 0;i<numFrictionConstr;j++)
{
for(PxU32 p = 0; p < numFrictionPerPoint; p++, i++)
{
SolverContactFriction& f = frictions[i];
Ps::prefetchLine(&frictions[i], 128);
const Vec3V t0 = Vec3V_From_Vec4V(f.normalXYZ_appliedForceW);
const Vec3V raXt0 = Vec3V_From_Vec4V(f.raXnXYZ_velMultiplierW);
const Vec3V rbXt0 = Vec3V_From_Vec4V(f.rbXnXYZ_biasW);
const FloatV appliedForce = V4GetW(f.normalXYZ_appliedForceW);
const FloatV velMultiplier = V4GetW(f.raXnXYZ_velMultiplierW);
const FloatV targetVel = FLoad(f.targetVel);
const FloatV normalImpulse = FLoad(appliedImpulse[j]);
const FloatV maxFriction = FMul(staticFriction, normalImpulse);
const FloatV nMaxFriction = FNeg(maxFriction);
//Compute the normal velocity of the constraint.
const FloatV t0Vel1 = V3Dot(t0, linVel0);
const FloatV t0Vel2 = V3Dot(raXt0, angState0);
const FloatV t0Vel3 = V3Dot(t0, linVel1);
const FloatV t0Vel4 = V3Dot(rbXt0, angState1);
const FloatV t0Vel = FSub(FAdd(t0Vel1, t0Vel2), FAdd(t0Vel3, t0Vel4));
const Vec3V delLinVel0 = V3Scale(t0, invMass0D0);
const Vec3V delLinVel1 = V3Scale(t0, invMass1D1);
// still lots to do here: using loop pipelining we can interweave this code with the
// above - the code here has a lot of stalls that we would thereby eliminate
const FloatV tmp = FNegScaleSub(targetVel,velMultiplier,appliedForce);
FloatV newForce = FScaleAdd(t0Vel, velMultiplier, tmp);
newForce = FClamp(newForce, nMaxFriction, maxFriction);
FloatV deltaF = FSub(newForce, appliedForce);
linVel0 = V3ScaleAdd(delLinVel0, deltaF, linVel0);
linVel1 = V3NegScaleSub(delLinVel1, deltaF, linVel1);
angState0 = V3ScaleAdd(raXt0, FMul(deltaF, angD0), angState0);
angState1 = V3NegScaleSub(rbXt0, FMul(deltaF, angD1), angState1);
f.setAppliedForce(newForce);
}
}
}
// Write back
V3StoreA(linVel0, b0.linearVelocity);
V3StoreA(linVel1, b1.linearVelocity);
V3StoreA(angState0, b0.angularState);
V3StoreA(angState1, b1.angularState);
PX_ASSERT(currPtr == last);
}
void solveContactCoulomb_BStatic(const PxSolverConstraintDesc& desc, SolverContext& /*cache*/)
{
PxSolverBody& b0 = *desc.bodyA;
Vec3V linVel0 = V3LoadA(b0.linearVelocity);
Vec3V angState0 = V3LoadA(b0.angularState);
SolverContactCoulombHeader* firstHeader = reinterpret_cast<SolverContactCoulombHeader*>(desc.constraint);
const PxU8* PX_RESTRICT last = desc.constraint + firstHeader->frictionOffset;//getConstraintLength(desc);
//hopefully pointer aliasing doesn't bite.
PxU8* PX_RESTRICT currPtr = desc.constraint;
//const FloatV zero = FZero();
while(currPtr < last)
{
SolverContactCoulombHeader* PX_RESTRICT hdr = reinterpret_cast<SolverContactCoulombHeader*>(currPtr);
currPtr += sizeof(SolverContactCoulombHeader);
const PxU32 numNormalConstr = hdr->numNormalConstr;
SolverContactPoint* PX_RESTRICT contacts = reinterpret_cast<SolverContactPoint*>(currPtr);
Ps::prefetchLine(contacts);
currPtr += numNormalConstr * sizeof(SolverContactPoint);
PxF32* appliedImpulse = reinterpret_cast<PxF32*> ((reinterpret_cast<PxU8*>(hdr)) + hdr->frictionOffset + sizeof(SolverFrictionHeader));
Ps::prefetchLine(appliedImpulse);
const Vec3V normal = hdr->getNormal();
const FloatV invMassDom0 = FLoad(hdr->dominance0);
const FloatV angD0 = FLoad(hdr->angDom0);
solveStaticContacts(contacts, numNormalConstr, normal, invMassDom0,
angD0, linVel0, angState0, appliedImpulse);
}
// Write back
V3StoreA(linVel0, b0.linearVelocity);
V3StoreA(angState0, b0.angularState);
PX_ASSERT(currPtr == last);
}
void solveFriction_BStatic(const PxSolverConstraintDesc& desc, SolverContext& /*cache*/)
{
PxSolverBody& b0 = *desc.bodyA;
Vec3V linVel0 = V3LoadA(b0.linearVelocity);
Vec3V angState0 = V3LoadA(b0.angularState);
PxU8* PX_RESTRICT currPtr = desc.constraint;
const PxU8* PX_RESTRICT last = currPtr + getConstraintLength(desc);
while(currPtr < last)
{
const SolverFrictionHeader* PX_RESTRICT frictionHeader = reinterpret_cast<SolverFrictionHeader*>(currPtr);
const PxU32 numFrictionConstr = frictionHeader->numFrictionConstr;
const PxU32 numNormalConstr = frictionHeader->numNormalConstr;
const PxU32 numFrictionPerPoint = numFrictionConstr/numNormalConstr;
currPtr +=sizeof(SolverFrictionHeader);
PxF32* appliedImpulse = reinterpret_cast<PxF32*>(currPtr);
currPtr +=frictionHeader->getAppliedForcePaddingSize();
SolverContactFriction* PX_RESTRICT frictions = reinterpret_cast<SolverContactFriction*>(currPtr);
currPtr += numFrictionConstr * sizeof(SolverContactFriction);
const FloatV invMass0 = FLoad(frictionHeader->invMass0D0);
const FloatV angD0 = FLoad(frictionHeader->angDom0);
//const FloatV angD1 = FLoad(frictionHeader->angDom1);
const FloatV staticFriction = frictionHeader->getStaticFriction();
for(PxU32 i=0, j = 0;i<numFrictionConstr;j++)
{
for(PxU32 p = 0; p < numFrictionPerPoint; p++, i++)
{
SolverContactFriction& f = frictions[i];
Ps::prefetchLine(&frictions[i+1]);
const Vec3V t0 = Vec3V_From_Vec4V(f.normalXYZ_appliedForceW);
const Vec3V raXt0 = Vec3V_From_Vec4V(f.raXnXYZ_velMultiplierW);
const FloatV appliedForce = V4GetW(f.normalXYZ_appliedForceW);
const FloatV velMultiplier = V4GetW(f.raXnXYZ_velMultiplierW);
const FloatV targetVel = FLoad(f.targetVel);
//const FloatV normalImpulse = contacts[f.contactIndex].getAppliedForce();
const FloatV normalImpulse = FLoad(appliedImpulse[j]);
const FloatV maxFriction = FMul(staticFriction, normalImpulse);
const FloatV nMaxFriction = FNeg(maxFriction);
//Compute the normal velocity of the constraint.
const FloatV t0Vel1 = V3Dot(t0, linVel0);
const FloatV t0Vel2 = V3Dot(raXt0, angState0);
const FloatV t0Vel = FAdd(t0Vel1, t0Vel2);
const Vec3V delangState0 = V3Scale(raXt0, angD0);
const Vec3V delLinVel0 = V3Scale(t0, invMass0);
// still lots to do here: using loop pipelining we can interweave this code with the
// above - the code here has a lot of stalls that we would thereby eliminate
const FloatV tmp = FNegScaleSub(targetVel,velMultiplier,appliedForce);
FloatV newForce = FScaleAdd(t0Vel, velMultiplier, tmp);
newForce = FClamp(newForce, nMaxFriction, maxFriction);
const FloatV deltaF = FSub(newForce, appliedForce);
linVel0 = V3ScaleAdd(delLinVel0, deltaF, linVel0);
angState0 = V3ScaleAdd(delangState0, deltaF, angState0);
f.setAppliedForce(newForce);
}
}
}
// Write back
V3StoreA(linVel0, b0.linearVelocity);
V3StoreA(angState0, b0.angularState);
PX_ASSERT(currPtr == last);
}
void concludeContactCoulomb(const PxSolverConstraintDesc& desc, SolverContext& /*cache*/)
{
PxU8* PX_RESTRICT cPtr = desc.constraint;
const SolverContactCoulombHeader* PX_RESTRICT firstHeader = reinterpret_cast<const SolverContactCoulombHeader*>(cPtr);
PxU8* PX_RESTRICT last = desc.constraint + firstHeader->frictionOffset;//getConstraintLength(desc);
while(cPtr < last)
{
const SolverContactCoulombHeader* PX_RESTRICT hdr = reinterpret_cast<const SolverContactCoulombHeader*>(cPtr);
cPtr += sizeof(SolverContactCoulombHeader);
const PxU32 numNormalConstr = hdr->numNormalConstr;
//if(cPtr < last)
//Ps::prefetchLine(cPtr, 512);
Ps::prefetchLine(cPtr,128);
Ps::prefetchLine(cPtr,256);
Ps::prefetchLine(cPtr,384);
const PxU32 pointStride = hdr->type == DY_SC_TYPE_EXT_CONTACT ? sizeof(SolverContactPointExt)
: sizeof(SolverContactPoint);
for(PxU32 i=0;i<numNormalConstr;i++)
{
SolverContactPoint *c = reinterpret_cast<SolverContactPoint*>(cPtr);
cPtr += pointStride;
//c->scaledBias = PxMin(c->scaledBias, 0.f);
c->biasedErr = c->unbiasedErr;
}
}
PX_ASSERT(cPtr == last);
}
void writeBackContactCoulomb(const PxSolverConstraintDesc& desc, SolverContext& cache,
PxSolverBodyData& bd0, PxSolverBodyData& bd1)
{
PxReal normalForce = 0.f;
PxU8* PX_RESTRICT cPtr = desc.constraint;
PxReal* PX_RESTRICT vForceWriteback = reinterpret_cast<PxReal*>(desc.writeBack);
const SolverContactCoulombHeader* PX_RESTRICT firstHeader = reinterpret_cast<const SolverContactCoulombHeader*>(cPtr);
PxU8* PX_RESTRICT last = desc.constraint + firstHeader->frictionOffset;
const PxU32 pointStride = firstHeader->type == DY_SC_TYPE_EXT_CONTACT ? sizeof(SolverContactPointExt)
: sizeof(SolverContactPoint);
bool hasForceThresholds = false;
while(cPtr < last)
{
const SolverContactCoulombHeader* PX_RESTRICT hdr = reinterpret_cast<const SolverContactCoulombHeader*>(cPtr);
cPtr += sizeof(SolverContactCoulombHeader);
PxF32* appliedImpulse = reinterpret_cast<PxF32*> (const_cast<PxU8*>((reinterpret_cast<const PxU8*>(hdr)) + hdr->frictionOffset + sizeof(SolverFrictionHeader)));
hasForceThresholds = hdr->flags & SolverContactHeader::eHAS_FORCE_THRESHOLDS;
const PxU32 numNormalConstr = hdr->numNormalConstr;
Ps::prefetchLine(cPtr, 256);
Ps::prefetchLine(cPtr, 384);
if(vForceWriteback!=NULL)
{
for(PxU32 i=0; i<numNormalConstr; i++)
{
PxF32 imp = appliedImpulse[i];
*vForceWriteback = imp;
vForceWriteback++;
normalForce += imp;
}
}
cPtr += numNormalConstr * pointStride;
}
PX_ASSERT(cPtr == last);
if(hasForceThresholds && desc.linkIndexA == PxSolverConstraintDesc::NO_LINK && desc.linkIndexB == PxSolverConstraintDesc::NO_LINK &&
normalForce !=0 && (bd0.reportThreshold < PX_MAX_REAL || bd1.reportThreshold < PX_MAX_REAL))
{
ThresholdStreamElement elt;
elt.normalForce = normalForce;
elt.threshold = PxMin<float>(bd0.reportThreshold, bd1.reportThreshold);
elt.nodeIndexA = IG::NodeIndex(bd0.nodeIndex);
elt.nodeIndexB = IG::NodeIndex(bd1.nodeIndex);
elt.shapeInteraction = (reinterpret_cast<SolverContactCoulombHeader*>(desc.constraint))->shapeInteraction;
Ps::order(elt.nodeIndexA, elt.nodeIndexB);
PX_ASSERT(elt.nodeIndexA < elt.nodeIndexB);
PX_ASSERT(cache.mThresholdStreamIndex<cache.mThresholdStreamLength);
cache.mThresholdStream[cache.mThresholdStreamIndex++] = elt;
}
}
void solveFrictionBlock(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
{
for(PxU32 a = 0; a < constraintCount; ++a)
{
solveFriction(desc[a], cache);
}
}
void solveFrictionBlockWriteBack(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
{
for(PxU32 a = 0; a < constraintCount; ++a)
{
solveFriction(desc[a], cache);
}
}
void solveFriction_BStaticBlock(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
{
for(PxU32 a = 0; a < constraintCount; ++a)
{
solveFriction_BStatic(desc[a], cache);
}
}
void solveFriction_BStaticConcludeBlock(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
{
for(PxU32 a = 0; a < constraintCount; ++a)
{
solveFriction_BStatic(desc[a], cache);
}
}
void solveFriction_BStaticBlockWriteBack(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
{
for(PxU32 a = 0; a < constraintCount; ++a)
{
solveFriction_BStatic(desc[a], cache);
}
}
void solveContactCoulombBlock(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
{
for(PxU32 a = 0; a < constraintCount; ++a)
{
solveContactCoulomb(desc[a], cache);
}
}
void solveContactCoulombConcludeBlock(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
{
for(PxU32 a = 0; a < constraintCount; ++a)
{
solveContactCoulomb(desc[a], cache);
concludeContactCoulomb(desc[a], cache);
}
}
void solveContactCoulombBlockWriteBack(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
{
for(PxU32 a = 0; a < constraintCount; ++a)
{
PxSolverBodyData& bd0 = cache.solverBodyArray[desc[a].bodyADataIndex];
PxSolverBodyData& bd1 = cache.solverBodyArray[desc[a].bodyBDataIndex];
solveContactCoulomb(desc[a], cache);
writeBackContactCoulomb(desc[a], cache, bd0, bd1);
}
if(cache.mThresholdStreamIndex > (cache.mThresholdStreamLength - 4))
{
//Write back to global buffer
PxI32 threshIndex = physx::shdfnd::atomicAdd(cache.mSharedOutThresholdPairs, PxI32(cache.mThresholdStreamIndex)) - PxI32(cache.mThresholdStreamIndex);
for(PxU32 a = 0; a < cache.mThresholdStreamIndex; ++a)
{
cache.mSharedThresholdStream[a + threshIndex] = cache.mThresholdStream[a];
}
cache.mThresholdStreamIndex = 0;
}
}
void solveContactCoulomb_BStaticBlock(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
{
for(PxU32 a = 0; a < constraintCount; ++a)
{
solveContactCoulomb_BStatic(desc[a], cache);
}
}
void solveContactCoulomb_BStaticConcludeBlock(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
{
for(PxU32 a = 0; a < constraintCount; ++a)
{
solveContactCoulomb_BStatic(desc[a], cache);
concludeContactCoulomb(desc[a], cache);
}
}
void solveContactCoulomb_BStaticBlockWriteBack(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
{
for(PxU32 a = 0; a < constraintCount; ++a)
{
PxSolverBodyData& bd0 = cache.solverBodyArray[desc[a].bodyADataIndex];
PxSolverBodyData& bd1 = cache.solverBodyArray[desc[a].bodyBDataIndex];
solveContactCoulomb_BStatic(desc[a], cache);
writeBackContactCoulomb(desc[a], cache, bd0, bd1);
}
if(cache.mThresholdStreamIndex > (cache.mThresholdStreamLength - 4))
{
//Not enough space to write 4 more thresholds back!
//Write back to global buffer
PxI32 threshIndex = physx::shdfnd::atomicAdd(cache.mSharedOutThresholdPairs, PxI32(cache.mThresholdStreamIndex)) - PxI32(cache.mThresholdStreamIndex);
for(PxU32 a = 0; a < cache.mThresholdStreamIndex; ++a)
{
cache.mSharedThresholdStream[a + threshIndex] = cache.mThresholdStream[a];
}
cache.mThresholdStreamIndex = 0;
}
}
void solveExtContactCoulomb(const PxSolverConstraintDesc& desc, SolverContext& cache)
{
//We'll need this.
// const FloatV zero = FZero();
// const FloatV one = FOne();
Vec3V linVel0, angVel0, linVel1, angVel1;
if(desc.linkIndexA == PxSolverConstraintDesc::NO_LINK)
{
linVel0 = V3LoadA(desc.bodyA->linearVelocity);
angVel0 = V3LoadA(desc.bodyA->angularState);
}
else
{
Cm::SpatialVectorV v = desc.articulationA->pxcFsGetVelocity(desc.linkIndexA);
linVel0 = v.linear;
angVel0 = v.angular;
}
if(desc.linkIndexB == PxSolverConstraintDesc::NO_LINK)
{
linVel1 = V3LoadA(desc.bodyB->linearVelocity);
angVel1 = V3LoadA(desc.bodyB->angularState);
}
else
{
Cm::SpatialVectorV v = desc.articulationB->pxcFsGetVelocity(desc.linkIndexB);
linVel1 = v.linear;
angVel1 = v.angular;
}
//const PxU8* PX_RESTRICT last = desc.constraint + desc.constraintLengthOver16*16;
PxU8* PX_RESTRICT currPtr = desc.constraint;
const SolverContactCoulombHeader* PX_RESTRICT firstHeader = reinterpret_cast<SolverContactCoulombHeader*>(currPtr);
const PxU8* PX_RESTRICT last = desc.constraint + firstHeader->frictionOffset;
//hopefully pointer aliasing doesn't bite.
Vec3V linImpulse0 = V3Zero(), linImpulse1 = V3Zero(), angImpulse0 = V3Zero(), angImpulse1 = V3Zero();
while(currPtr < last)
{
const SolverContactCoulombHeader* PX_RESTRICT hdr = reinterpret_cast<SolverContactCoulombHeader*>(currPtr);
currPtr += sizeof(SolverContactCoulombHeader);
const PxU32 numNormalConstr = hdr->numNormalConstr;
PxF32* appliedImpulse = reinterpret_cast<PxF32*>(const_cast<PxU8*>(((reinterpret_cast<const PxU8*>(hdr)) + hdr->frictionOffset + sizeof(SolverFrictionHeader))));
Ps::prefetchLine(appliedImpulse);
SolverContactPointExt* PX_RESTRICT contacts = reinterpret_cast<SolverContactPointExt*>(currPtr);
Ps::prefetchLine(contacts);
currPtr += numNormalConstr * sizeof(SolverContactPointExt);
Vec3V li0 = V3Zero(), li1 = V3Zero(), ai0 = V3Zero(), ai1 = V3Zero();
const Vec3V normal = hdr->getNormal();
solveExtContacts(contacts, numNormalConstr, normal, linVel0, angVel0, linVel1, angVel1, li0, ai0, li1, ai1, appliedImpulse);
linImpulse0 = V3ScaleAdd(li0, FLoad(hdr->dominance0), linImpulse0);
angImpulse0 = V3ScaleAdd(ai0, FLoad(hdr->angDom0), angImpulse0);
linImpulse1 = V3NegScaleSub(li1, FLoad(hdr->dominance1), linImpulse1);
angImpulse1 = V3NegScaleSub(ai1, FLoad(hdr->angDom1), angImpulse1);
}
if(desc.linkIndexA == PxSolverConstraintDesc::NO_LINK)
{
V3StoreA(linVel0, desc.bodyA->linearVelocity);
V3StoreA(angVel0, desc.bodyA->angularState);
}
else
{
Cm::SpatialVectorF Z[64];
Cm::SpatialVectorF deltaV[64];
desc.articulationA->pxcFsApplyImpulse(desc.linkIndexA, linImpulse0,
angImpulse0, Z, deltaV);
}
if(desc.linkIndexB == PxSolverConstraintDesc::NO_LINK)
{
V3StoreA(linVel1, desc.bodyB->linearVelocity);
V3StoreA(angVel1, desc.bodyB->angularState);
}
else
{
desc.articulationB->pxcFsApplyImpulse(desc.linkIndexB, linImpulse1,
angImpulse1, cache.Z, cache.deltaV);
}
PX_ASSERT(currPtr == last);
}
void solveExtFriction(const PxSolverConstraintDesc& desc, SolverContext& cache)
{
Vec3V linVel0, angVel0, linVel1, angVel1;
if(desc.linkIndexA == PxSolverConstraintDesc::NO_LINK)
{
linVel0 = V3LoadA(desc.bodyA->linearVelocity);
angVel0 = V3LoadA(desc.bodyA->angularState);
}
else
{
Cm::SpatialVectorV v = desc.articulationA->pxcFsGetVelocity(desc.linkIndexA);
linVel0 = v.linear;
angVel0 = v.angular;
}
if(desc.linkIndexB == PxSolverConstraintDesc::NO_LINK)
{
linVel1 = V3LoadA(desc.bodyB->linearVelocity);
angVel1 = V3LoadA(desc.bodyB->angularState);
}
else
{
Cm::SpatialVectorV v = desc.articulationB->pxcFsGetVelocity(desc.linkIndexB);
linVel1 = v.linear;
angVel1 = v.angular;
}
//hopefully pointer aliasing doesn't bite.
PxU8* PX_RESTRICT currPtr = desc.constraint;
const PxU8* PX_RESTRICT last = currPtr + desc.constraintLengthOver16*16;
Vec3V linImpulse0 = V3Zero(), linImpulse1 = V3Zero(), angImpulse0 = V3Zero(), angImpulse1 = V3Zero();
while(currPtr < last)
{
const SolverFrictionHeader* PX_RESTRICT frictionHeader = reinterpret_cast<SolverFrictionHeader*>(currPtr);
currPtr += sizeof(SolverFrictionHeader);
PxF32* appliedImpulse = reinterpret_cast<PxF32*>(currPtr);
currPtr += frictionHeader->getAppliedForcePaddingSize();
SolverContactFrictionExt* PX_RESTRICT frictions = reinterpret_cast<SolverContactFrictionExt*>(currPtr);
const PxU32 numFrictionConstr = frictionHeader->numFrictionConstr;
currPtr += numFrictionConstr * sizeof(SolverContactFrictionExt);
const FloatV staticFriction = frictionHeader->getStaticFriction();
Vec3V li0 = V3Zero(), li1 = V3Zero(), ai0 = V3Zero(), ai1 = V3Zero();
PxU32 numNormalConstr = frictionHeader->numNormalConstr;
PxU32 nbFrictionsPerPoint = numFrictionConstr/numNormalConstr;
for(PxU32 i = 0, j = 0; i < numFrictionConstr; j++)
{
for(PxU32 p=0;p<nbFrictionsPerPoint;p++, i++)
{
SolverContactFrictionExt& f = frictions[i];
Ps::prefetchLine(&frictions[i+1]);
const Vec3V t0 = Vec3V_From_Vec4V(f.normalXYZ_appliedForceW);
const Vec3V raXt0 = Vec3V_From_Vec4V(f.raXnXYZ_velMultiplierW);
const Vec3V rbXt0 = Vec3V_From_Vec4V(f.rbXnXYZ_biasW);
const FloatV appliedForce = V4GetW(f.normalXYZ_appliedForceW);
const FloatV velMultiplier = V4GetW(f.raXnXYZ_velMultiplierW);
const FloatV targetVel = FLoad(f.targetVel);
const FloatV normalImpulse = FLoad(appliedImpulse[j]);//contacts[f.contactIndex].getAppliedForce();
const FloatV maxFriction = FMul(staticFriction, normalImpulse);
const FloatV nMaxFriction = FNeg(maxFriction);
//Compute the normal velocity of the constraint.
Vec3V rVel = V3MulAdd(linVel0, t0, V3Mul(angVel0, raXt0));
rVel = V3Sub(rVel, V3MulAdd(linVel1, t0, V3Mul(angVel1, rbXt0)));
const FloatV t0Vel = FAdd(V3SumElems(rVel), targetVel);
FloatV deltaF = FNeg(FMul(t0Vel, velMultiplier));
FloatV newForce = FAdd(appliedForce, deltaF);
newForce = FClamp(newForce, nMaxFriction, maxFriction);
deltaF = FSub(newForce, appliedForce);
linVel0 = V3ScaleAdd(f.linDeltaVA, deltaF, linVel0);
angVel0 = V3ScaleAdd(f.angDeltaVA, deltaF, angVel0);
linVel1 = V3ScaleAdd(f.linDeltaVB, deltaF, linVel1);
angVel1 = V3ScaleAdd(f.angDeltaVB, deltaF, angVel1);
li0 = V3ScaleAdd(t0, deltaF, li0); ai0 = V3ScaleAdd(raXt0, deltaF, ai0);
li1 = V3ScaleAdd(t0, deltaF, li1); ai1 = V3ScaleAdd(rbXt0, deltaF, ai1);
f.setAppliedForce(newForce);
}
}
linImpulse0 = V3ScaleAdd(li0, FLoad(frictionHeader->invMass0D0), linImpulse0);
angImpulse0 = V3ScaleAdd(ai0, FLoad(frictionHeader->angDom0), angImpulse0);
linImpulse1 = V3NegScaleSub(li1, FLoad(frictionHeader->invMass1D1), linImpulse1);
angImpulse1 = V3NegScaleSub(ai1, FLoad(frictionHeader->angDom1), angImpulse1);
}
if(desc.linkIndexA == PxSolverConstraintDesc::NO_LINK)
{
V3StoreA(linVel0, desc.bodyA->linearVelocity);
V3StoreA(angVel0, desc.bodyA->angularState);
}
else
{
desc.articulationA->pxcFsApplyImpulse(desc.linkIndexA,
linImpulse0, angImpulse0, cache.Z, cache.deltaV);
}
if(desc.linkIndexB == PxSolverConstraintDesc::NO_LINK)
{
V3StoreA(linVel1, desc.bodyB->linearVelocity);
V3StoreA(angVel1, desc.bodyB->angularState);
}
else
{
desc.articulationB->pxcFsApplyImpulse(desc.linkIndexB,
linImpulse1, angImpulse1, cache.Z, cache.deltaV);
}
PX_ASSERT(currPtr == last);
}
void solveExtFrictionBlock(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
{
for(PxU32 a = 0; a < constraintCount; ++a)
{
solveExtFriction(desc[a], cache);
}
}
void solveExtFrictionConcludeBlock(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
{
for(PxU32 a = 0; a < constraintCount; ++a)
{
solveExtFriction(desc[a], cache);
}
}
void solveExtFrictionBlockWriteBack(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
{
for(PxU32 a = 0; a < constraintCount; ++a)
{
solveExtFriction(desc[a], cache);
}
}
void solveConcludeExtContactCoulomb (const PxSolverConstraintDesc& desc, SolverContext& cache)
{
solveExtContactCoulomb(desc, cache);
concludeContactCoulomb(desc, cache);
}
void solveExtContactCoulombBlock(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
{
for(PxU32 a = 0; a < constraintCount; ++a)
{
solveExtContactCoulomb(desc[a], cache);
}
}
void solveExtContactCoulombConcludeBlock(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
{
for(PxU32 a = 0; a < constraintCount; ++a)
{
solveExtContactCoulomb(desc[a], cache);
concludeContactCoulomb(desc[a], cache);
}
}
void solveExtContactCoulombBlockWriteBack(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
{
for(PxU32 a = 0; a < constraintCount; ++a)
{
PxSolverBodyData& bd0 = cache.solverBodyArray[desc[a].linkIndexA != PxSolverConstraintDesc::NO_LINK ? 0 : desc[a].bodyADataIndex];
PxSolverBodyData& bd1 = cache.solverBodyArray[desc[a].linkIndexB != PxSolverConstraintDesc::NO_LINK ? 0 : desc[a].bodyBDataIndex];
solveExtContactCoulomb(desc[a], cache);
writeBackContactCoulomb(desc[a], cache, bd0, bd1);
}
if(cache.mThresholdStreamIndex > 0)
{
//Not enough space to write 4 more thresholds back!
//Write back to global buffer
PxI32 threshIndex = physx::shdfnd::atomicAdd(cache.mSharedOutThresholdPairs, PxI32(cache.mThresholdStreamIndex)) - PxI32(cache.mThresholdStreamIndex);
for(PxU32 a = 0; a < cache.mThresholdStreamIndex; ++a)
{
cache.mSharedThresholdStream[a + threshIndex] = cache.mThresholdStream[a];
}
cache.mThresholdStreamIndex = 0;
}
}
void solveConcludeContactCoulomb (const PxSolverConstraintDesc& desc, SolverContext& cache)
{
solveContactCoulomb(desc, cache);
concludeContactCoulomb(desc, cache);
}
void solveConcludeContactCoulomb_BStatic (const PxSolverConstraintDesc& desc, SolverContext& cache)
{
solveContactCoulomb_BStatic(desc, cache);
concludeContactCoulomb(desc, cache);
}
}
}

View File

@ -0,0 +1,985 @@
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2021 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#include "foundation/PxPreprocessor.h"
#include "PsVecMath.h"
#include "PsFPU.h"
#include "CmPhysXCommon.h"
#include "DySolverBody.h"
#include "DySolverContactPF4.h"
#include "DySolverConstraint1D.h"
#include "DySolverConstraintDesc.h"
#include "DyThresholdTable.h"
#include "DySolverContext.h"
#include "PsUtilities.h"
#include "DyConstraint.h"
#include "PsAtomic.h"
#include "DySolverContact.h"
namespace physx
{
namespace Dy
{
static void solveContactCoulomb4_Block(const PxSolverConstraintDesc* PX_RESTRICT desc, SolverContext& /*cache*/)
{
PxSolverBody& b00 = *desc[0].bodyA;
PxSolverBody& b01 = *desc[0].bodyB;
PxSolverBody& b10 = *desc[1].bodyA;
PxSolverBody& b11 = *desc[1].bodyB;
PxSolverBody& b20 = *desc[2].bodyA;
PxSolverBody& b21 = *desc[2].bodyB;
PxSolverBody& b30 = *desc[3].bodyA;
PxSolverBody& b31 = *desc[3].bodyB;
//We'll need this.
const Vec4V vZero = V4Zero();
Vec4V linVel00 = V4LoadA(&b00.linearVelocity.x);
Vec4V linVel01 = V4LoadA(&b01.linearVelocity.x);
Vec4V angState00 = V4LoadA(&b00.angularState.x);
Vec4V angState01 = V4LoadA(&b01.angularState.x);
Vec4V linVel10 = V4LoadA(&b10.linearVelocity.x);
Vec4V linVel11 = V4LoadA(&b11.linearVelocity.x);
Vec4V angState10 = V4LoadA(&b10.angularState.x);
Vec4V angState11 = V4LoadA(&b11.angularState.x);
Vec4V linVel20 = V4LoadA(&b20.linearVelocity.x);
Vec4V linVel21 = V4LoadA(&b21.linearVelocity.x);
Vec4V angState20 = V4LoadA(&b20.angularState.x);
Vec4V angState21 = V4LoadA(&b21.angularState.x);
Vec4V linVel30 = V4LoadA(&b30.linearVelocity.x);
Vec4V linVel31 = V4LoadA(&b31.linearVelocity.x);
Vec4V angState30 = V4LoadA(&b30.angularState.x);
Vec4V angState31 = V4LoadA(&b31.angularState.x);
Vec4V linVel0T0, linVel0T1, linVel0T2, linVel0T3;
Vec4V linVel1T0, linVel1T1, linVel1T2, linVel1T3;
Vec4V angState0T0, angState0T1, angState0T2, angState0T3;
Vec4V angState1T0, angState1T1, angState1T2, angState1T3;
PX_TRANSPOSE_44(linVel00, linVel10, linVel20, linVel30, linVel0T0, linVel0T1, linVel0T2, linVel0T3);
PX_TRANSPOSE_44(linVel01, linVel11, linVel21, linVel31, linVel1T0, linVel1T1, linVel1T2, linVel1T3);
PX_TRANSPOSE_44(angState00, angState10, angState20, angState30, angState0T0, angState0T1, angState0T2, angState0T3);
PX_TRANSPOSE_44(angState01, angState11, angState21, angState31, angState1T0, angState1T1, angState1T2, angState1T3);
//hopefully pointer aliasing doesn't bite.
PxU8* PX_RESTRICT currPtr = desc[0].constraint;
SolverContactCoulombHeader4* PX_RESTRICT firstHeader = reinterpret_cast<SolverContactCoulombHeader4*>(currPtr);
const PxU8* PX_RESTRICT last = desc[0].constraint + firstHeader->frictionOffset;
//const PxU8* PX_RESTRICT endPtr = desc[0].constraint + getConstraintLength(desc[0]);
//TODO - can I avoid this many tests???
while(currPtr < last)
{
SolverContactCoulombHeader4* PX_RESTRICT hdr = reinterpret_cast<SolverContactCoulombHeader4*>(currPtr);
Vec4V* appliedForceBuffer = reinterpret_cast<Vec4V*>(currPtr + hdr->frictionOffset + sizeof(SolverFrictionHeader4));
//PX_ASSERT((PxU8*)appliedForceBuffer < endPtr);
currPtr = reinterpret_cast<PxU8*>(hdr + 1);
const PxU32 numNormalConstr = hdr->numNormalConstr;
SolverContact4Dynamic* PX_RESTRICT contacts = reinterpret_cast<SolverContact4Dynamic*>(currPtr);
//const Vec4V dominance1 = V4Neg(__dominance1);
currPtr = reinterpret_cast<PxU8*>(contacts + numNormalConstr);
const Vec4V invMass0D0 = hdr->invMassADom;
const Vec4V invMass1D1 = hdr->invMassBDom;
const Vec4V angD0 = hdr->angD0;
const Vec4V angD1 = hdr->angD1;
const Vec4V normalT0 = hdr->normalX;
const Vec4V normalT1 = hdr->normalY;
const Vec4V normalT2 = hdr->normalZ;
const Vec4V __normalVel1 = V4Mul(linVel0T0, normalT0);
const Vec4V __normalVel3 = V4Mul(linVel1T0, normalT0);
const Vec4V _normalVel1 = V4MulAdd(linVel0T1, normalT1, __normalVel1);
const Vec4V _normalVel3 = V4MulAdd(linVel1T1, normalT1, __normalVel3);
Vec4V normalVel1 = V4MulAdd(linVel0T2, normalT2, _normalVel1);
Vec4V normalVel3 = V4MulAdd(linVel1T2, normalT2, _normalVel3);
Vec4V accumDeltaF = vZero;
for(PxU32 i=0;i<numNormalConstr;i++)
{
SolverContact4Dynamic& c = contacts[i];
Ps::prefetchLine((&contacts[i+1]));
Ps::prefetchLine((&contacts[i+1]), 128);
Ps::prefetchLine((&contacts[i+1]), 256);
Ps::prefetchLine((&contacts[i+1]), 384);
const Vec4V appliedForce = c.appliedForce;
const Vec4V velMultiplier = c.velMultiplier;
const Vec4V targetVel = c.targetVelocity;
const Vec4V scaledBias = c.scaledBias;
const Vec4V maxImpulse = c.maxImpulse;
const Vec4V raXnT0 = c.raXnX;
const Vec4V raXnT1 = c.raXnY;
const Vec4V raXnT2 = c.raXnZ;
const Vec4V rbXnT0 = c.rbXnX;
const Vec4V rbXnT1 = c.rbXnY;
const Vec4V rbXnT2 = c.rbXnZ;
const Vec4V __normalVel2 = V4Mul(raXnT0, angState0T0);
const Vec4V __normalVel4 = V4Mul(rbXnT0, angState1T0);
const Vec4V _normalVel2 = V4MulAdd(raXnT1, angState0T1, __normalVel2);
const Vec4V _normalVel4 = V4MulAdd(rbXnT1, angState1T1, __normalVel4);
const Vec4V normalVel2 = V4MulAdd(raXnT2, angState0T2, _normalVel2);
const Vec4V normalVel4 = V4MulAdd(rbXnT2, angState1T2, _normalVel4);
const Vec4V biasedErr = V4MulAdd(targetVel, velMultiplier, V4Neg(scaledBias));
//Linear component - normal * invMass_dom
const Vec4V _normalVel(V4Add(normalVel1, normalVel2));
const Vec4V __normalVel(V4Add(normalVel3, normalVel4));
const Vec4V normalVel = V4Sub(_normalVel, __normalVel );
const Vec4V _deltaF = V4NegMulSub(normalVel, velMultiplier, biasedErr);
const Vec4V nAppliedForce = V4Neg(appliedForce);
const Vec4V _deltaF2 = V4Max(_deltaF, nAppliedForce);
const Vec4V _newAppliedForce(V4Add(appliedForce, _deltaF2));
const Vec4V newAppliedForce = V4Min(_newAppliedForce, maxImpulse);
const Vec4V deltaF = V4Sub(newAppliedForce, appliedForce);
normalVel1 = V4MulAdd(invMass0D0, deltaF, normalVel1);
normalVel3 = V4NegMulSub(invMass1D1, deltaF, normalVel3);
accumDeltaF = V4Add(deltaF, accumDeltaF);
const Vec4V deltaFAng0 = V4Mul(angD0, deltaF);
const Vec4V deltaFAng1 = V4Mul(angD1, deltaF);
angState0T0 = V4MulAdd(raXnT0, deltaFAng0, angState0T0);
angState1T0 = V4NegMulSub(rbXnT0, deltaFAng1, angState1T0);
angState0T1 = V4MulAdd(raXnT1, deltaFAng0, angState0T1);
angState1T1 = V4NegMulSub(rbXnT1, deltaFAng1, angState1T1);
angState0T2 = V4MulAdd(raXnT2, deltaFAng0, angState0T2);
angState1T2 = V4NegMulSub(rbXnT2, deltaFAng1, angState1T2);
c.appliedForce = newAppliedForce;
appliedForceBuffer[i] = newAppliedForce;
}
const Vec4V accumDeltaF0 = V4Mul(accumDeltaF, invMass0D0);
const Vec4V accumDeltaF1 = V4Mul(accumDeltaF, invMass1D1);
linVel0T0 = V4MulAdd(normalT0, accumDeltaF0, linVel0T0);
linVel1T0 = V4NegMulSub(normalT0, accumDeltaF1, linVel1T0);
linVel0T1 = V4MulAdd(normalT1, accumDeltaF0, linVel0T1);
linVel1T1 = V4NegMulSub(normalT1, accumDeltaF1, linVel1T1);
linVel0T2 = V4MulAdd(normalT2, accumDeltaF0, linVel0T2);
linVel1T2 = V4NegMulSub(normalT2, accumDeltaF1, linVel1T2);
}
PX_ASSERT(currPtr == last);
//KS - we need to use PX_TRANSPOSE_44 here instead of the 34_43 variants because the W components are being used to
//store the bodies' progress counters.
PX_TRANSPOSE_44(linVel0T0, linVel0T1, linVel0T2, linVel0T3, linVel00, linVel10, linVel20, linVel30);
PX_TRANSPOSE_44(linVel1T0, linVel1T1, linVel1T2, linVel1T3, linVel01, linVel11, linVel21, linVel31);
PX_TRANSPOSE_44(angState0T0, angState0T1, angState0T2, angState0T3, angState00, angState10, angState20, angState30);
PX_TRANSPOSE_44(angState1T0, angState1T1, angState1T2, angState1T3, angState01, angState11, angState21, angState31);
// Write back
V4StoreA(linVel00, &b00.linearVelocity.x);
V4StoreA(linVel10, &b10.linearVelocity.x);
V4StoreA(linVel20, &b20.linearVelocity.x);
V4StoreA(linVel30, &b30.linearVelocity.x);
V4StoreA(linVel01, &b01.linearVelocity.x);
V4StoreA(linVel11, &b11.linearVelocity.x);
V4StoreA(linVel21, &b21.linearVelocity.x);
V4StoreA(linVel31, &b31.linearVelocity.x);
V4StoreA(angState00, &b00.angularState.x);
V4StoreA(angState10, &b10.angularState.x);
V4StoreA(angState20, &b20.angularState.x);
V4StoreA(angState30, &b30.angularState.x);
V4StoreA(angState01, &b01.angularState.x);
V4StoreA(angState11, &b11.angularState.x);
V4StoreA(angState21, &b21.angularState.x);
V4StoreA(angState31, &b31.angularState.x);
}
static void solveContactCoulomb4_StaticBlock(const PxSolverConstraintDesc* PX_RESTRICT desc, SolverContext& /*cache*/)
{
PxSolverBody& b00 = *desc[0].bodyA;
PxSolverBody& b10 = *desc[1].bodyA;
PxSolverBody& b20 = *desc[2].bodyA;
PxSolverBody& b30 = *desc[3].bodyA;
//We'll need this.
const Vec4V vZero = V4Zero();
Vec4V linVel00 = V4LoadA(&b00.linearVelocity.x);
Vec4V angState00 = V4LoadA(&b00.angularState.x);
Vec4V linVel10 = V4LoadA(&b10.linearVelocity.x);
Vec4V angState10 = V4LoadA(&b10.angularState.x);
Vec4V linVel20 = V4LoadA(&b20.linearVelocity.x);
Vec4V angState20 = V4LoadA(&b20.angularState.x);
Vec4V linVel30 = V4LoadA(&b30.linearVelocity.x);
Vec4V angState30 = V4LoadA(&b30.angularState.x);
Vec4V linVel0T0, linVel0T1, linVel0T2, linVel0T3;
Vec4V angState0T0, angState0T1, angState0T2, angState0T3;
PX_TRANSPOSE_44(linVel00, linVel10, linVel20, linVel30, linVel0T0, linVel0T1, linVel0T2, linVel0T3);
PX_TRANSPOSE_44(angState00, angState10, angState20, angState30, angState0T0, angState0T1, angState0T2, angState0T3);
//hopefully pointer aliasing doesn't bite.
PxU8* PX_RESTRICT currPtr = desc[0].constraint;
SolverContactCoulombHeader4* PX_RESTRICT firstHeader = reinterpret_cast<SolverContactCoulombHeader4*>(currPtr);
const PxU8* PX_RESTRICT last = desc[0].constraint + firstHeader->frictionOffset;
//TODO - can I avoid this many tests???
while(currPtr < last)
{
SolverContactCoulombHeader4* PX_RESTRICT hdr = reinterpret_cast<SolverContactCoulombHeader4*>(currPtr);
Vec4V* appliedForceBuffer = reinterpret_cast<Vec4V*>(currPtr + hdr->frictionOffset + sizeof(SolverFrictionHeader4));
currPtr = reinterpret_cast<PxU8*>(hdr + 1);
const PxU32 numNormalConstr = hdr->numNormalConstr;
SolverContact4Base* PX_RESTRICT contacts = reinterpret_cast<SolverContact4Base*>(currPtr);
currPtr = reinterpret_cast<PxU8*>(contacts + numNormalConstr);
const Vec4V invMass0D0 = hdr->invMassADom;
const Vec4V angD0 = hdr->angD0;
const Vec4V normalT0 = hdr->normalX;
const Vec4V normalT1 = hdr->normalY;
const Vec4V normalT2 = hdr->normalZ;
const Vec4V __normalVel1 = V4Mul(linVel0T0, normalT0);
const Vec4V _normalVel1 = V4MulAdd(linVel0T1, normalT1, __normalVel1);
Vec4V normalVel1 = V4MulAdd(linVel0T2, normalT2, _normalVel1);
Vec4V accumDeltaF = vZero;
for(PxU32 i=0;i<numNormalConstr;i++)
{
SolverContact4Base& c = contacts[i];
Ps::prefetchLine((&contacts[i+1]));
Ps::prefetchLine((&contacts[i+1]), 128);
Ps::prefetchLine((&contacts[i+1]), 256);
const Vec4V appliedForce = c.appliedForce;
const Vec4V velMultiplier = c.velMultiplier;
const Vec4V targetVel = c.targetVelocity;
const Vec4V scaledBias = c.scaledBias;
const Vec4V maxImpulse = c.maxImpulse;
const Vec4V raXnT0 = c.raXnX;
const Vec4V raXnT1 = c.raXnY;
const Vec4V raXnT2 = c.raXnZ;
const Vec4V __normalVel2 = V4Mul(raXnT0, angState0T0);
const Vec4V _normalVel2 = V4MulAdd(raXnT1, angState0T1, __normalVel2);
const Vec4V normalVel2 = V4MulAdd(raXnT2, angState0T2, _normalVel2);
const Vec4V biasedErr = V4MulAdd(targetVel, velMultiplier, V4Neg(scaledBias));
//Linear component - normal * invMass_dom
const Vec4V normalVel(V4Add(normalVel1, normalVel2));
const Vec4V _deltaF = V4NegMulSub(normalVel, velMultiplier, biasedErr);
const Vec4V nAppliedForce = V4Neg(appliedForce);
const Vec4V _deltaF2 = V4Max(_deltaF, nAppliedForce);
const Vec4V _newAppliedForce(V4Add(appliedForce, _deltaF2));
const Vec4V newAppliedForce = V4Min(_newAppliedForce, maxImpulse);
const Vec4V deltaF = V4Sub(newAppliedForce, appliedForce);
const Vec4V deltaAngF = V4Mul(deltaF, angD0);
normalVel1 = V4MulAdd(invMass0D0, deltaF, normalVel1);
accumDeltaF = V4Add(deltaF, accumDeltaF);
angState0T0 = V4MulAdd(raXnT0, deltaAngF, angState0T0);
angState0T1 = V4MulAdd(raXnT1, deltaAngF, angState0T1);
angState0T2 = V4MulAdd(raXnT2, deltaAngF, angState0T2);
c.appliedForce = newAppliedForce;
appliedForceBuffer[i] = newAppliedForce;
}
const Vec4V scaledAccumDeltaF = V4Mul(accumDeltaF, invMass0D0);
linVel0T0 = V4MulAdd(normalT0, scaledAccumDeltaF, linVel0T0);
linVel0T1 = V4MulAdd(normalT1, scaledAccumDeltaF, linVel0T1);
linVel0T2 = V4MulAdd(normalT2, scaledAccumDeltaF, linVel0T2);
}
PX_ASSERT(currPtr == last);
//KS - we need to use PX_TRANSPOSE_44 here instead of the 34_43 variants because the W components are being used to
//store the bodies' progress counters.
PX_TRANSPOSE_44(linVel0T0, linVel0T1, linVel0T2, linVel0T3, linVel00, linVel10, linVel20, linVel30);
PX_TRANSPOSE_44(angState0T0, angState0T1, angState0T2, angState0T3, angState00, angState10, angState20, angState30);
// Write back
// Write back
V4StoreA(linVel00, &b00.linearVelocity.x);
V4StoreA(linVel10, &b10.linearVelocity.x);
V4StoreA(linVel20, &b20.linearVelocity.x);
V4StoreA(linVel30, &b30.linearVelocity.x);
V4StoreA(angState00, &b00.angularState.x);
V4StoreA(angState10, &b10.angularState.x);
V4StoreA(angState20, &b20.angularState.x);
V4StoreA(angState30, &b30.angularState.x);
}
static void solveFriction4_Block(const PxSolverConstraintDesc* PX_RESTRICT desc, SolverContext& /*cache*/)
{
PxSolverBody& b00 = *desc[0].bodyA;
PxSolverBody& b01 = *desc[0].bodyB;
PxSolverBody& b10 = *desc[1].bodyA;
PxSolverBody& b11 = *desc[1].bodyB;
PxSolverBody& b20 = *desc[2].bodyA;
PxSolverBody& b21 = *desc[2].bodyB;
PxSolverBody& b30 = *desc[3].bodyA;
PxSolverBody& b31 = *desc[3].bodyB;
Vec4V linVel00 = V4LoadA(&b00.linearVelocity.x);
Vec4V linVel01 = V4LoadA(&b01.linearVelocity.x);
Vec4V angState00 = V4LoadA(&b00.angularState.x);
Vec4V angState01 = V4LoadA(&b01.angularState.x);
Vec4V linVel10 = V4LoadA(&b10.linearVelocity.x);
Vec4V linVel11 = V4LoadA(&b11.linearVelocity.x);
Vec4V angState10 = V4LoadA(&b10.angularState.x);
Vec4V angState11 = V4LoadA(&b11.angularState.x);
Vec4V linVel20 = V4LoadA(&b20.linearVelocity.x);
Vec4V linVel21 = V4LoadA(&b21.linearVelocity.x);
Vec4V angState20 = V4LoadA(&b20.angularState.x);
Vec4V angState21 = V4LoadA(&b21.angularState.x);
Vec4V linVel30 = V4LoadA(&b30.linearVelocity.x);
Vec4V linVel31 = V4LoadA(&b31.linearVelocity.x);
Vec4V angState30 = V4LoadA(&b30.angularState.x);
Vec4V angState31 = V4LoadA(&b31.angularState.x);
Vec4V linVel0T0, linVel0T1, linVel0T2, linVel0T3;
Vec4V linVel1T0, linVel1T1, linVel1T2, linVel1T3;
Vec4V angState0T0, angState0T1, angState0T2, angState0T3;
Vec4V angState1T0, angState1T1, angState1T2, angState1T3;
PX_TRANSPOSE_44(linVel00, linVel10, linVel20, linVel30, linVel0T0, linVel0T1, linVel0T2, linVel0T3);
PX_TRANSPOSE_44(linVel01, linVel11, linVel21, linVel31, linVel1T0, linVel1T1, linVel1T2, linVel1T3);
PX_TRANSPOSE_44(angState00, angState10, angState20, angState30, angState0T0, angState0T1, angState0T2, angState0T3);
PX_TRANSPOSE_44(angState01, angState11, angState21, angState31, angState1T0, angState1T1, angState1T2, angState1T3);
PxU8* PX_RESTRICT currPtr = desc[0].constraint;
PxU8* PX_RESTRICT endPtr = desc[0].constraint + getConstraintLength(desc[0]);
while(currPtr < endPtr)
{
SolverFrictionHeader4* PX_RESTRICT hdr = reinterpret_cast<SolverFrictionHeader4*>(currPtr);
currPtr = reinterpret_cast<PxU8*>(hdr + 1);
Vec4V* appliedImpulses = reinterpret_cast<Vec4V*>(currPtr);
currPtr += hdr->numNormalConstr * sizeof(Vec4V);
Ps::prefetchLine(currPtr, 128);
Ps::prefetchLine(currPtr,256);
Ps::prefetchLine(currPtr,384);
const PxU32 numFrictionConstr = hdr->numFrictionConstr;
SolverFriction4Dynamic* PX_RESTRICT frictions = reinterpret_cast<SolverFriction4Dynamic*>(currPtr);
currPtr = reinterpret_cast<PxU8*>(frictions + hdr->numFrictionConstr);
const PxU32 maxFrictionConstr = numFrictionConstr;
const Vec4V staticFric = hdr->staticFriction;
const Vec4V invMass0D0 = hdr->invMassADom;
const Vec4V invMass1D1 = hdr->invMassBDom;
const Vec4V angD0 = hdr->angD0;
const Vec4V angD1 = hdr->angD1;
for(PxU32 i=0;i<maxFrictionConstr;i++)
{
SolverFriction4Dynamic& f = frictions[i];
Ps::prefetchLine((&f)+1);
Ps::prefetchLine((&f)+1,128);
Ps::prefetchLine((&f)+1,256);
Ps::prefetchLine((&f)+1,384);
const Vec4V appliedImpulse = appliedImpulses[i>>hdr->frictionPerContact];
const Vec4V maxFriction = V4Mul(staticFric, appliedImpulse);
const Vec4V nMaxFriction = V4Neg(maxFriction);
const Vec4V normalX = f.normalX;
const Vec4V normalY = f.normalY;
const Vec4V normalZ = f.normalZ;
const Vec4V raXnX = f.raXnX;
const Vec4V raXnY = f.raXnY;
const Vec4V raXnZ = f.raXnZ;
const Vec4V rbXnX = f.rbXnX;
const Vec4V rbXnY = f.rbXnY;
const Vec4V rbXnZ = f.rbXnZ;
const Vec4V appliedForce(f.appliedForce);
const Vec4V velMultiplier(f.velMultiplier);
const Vec4V targetVel(f.targetVelocity);
//4 x 4 Dot3 products encoded as 8 M44 transposes, 4 MulV and 8 MulAdd ops
const Vec4V __normalVel1 = V4Mul(linVel0T0, normalX);
const Vec4V __normalVel2 = V4Mul(raXnX, angState0T0);
const Vec4V __normalVel3 = V4Mul(linVel1T0, normalX);
const Vec4V __normalVel4 = V4Mul(rbXnX, angState1T0);
const Vec4V _normalVel1 = V4MulAdd(linVel0T1, normalY, __normalVel1);
const Vec4V _normalVel2 = V4MulAdd(raXnY, angState0T1, __normalVel2);
const Vec4V _normalVel3 = V4MulAdd(linVel1T1, normalY, __normalVel3);
const Vec4V _normalVel4 = V4MulAdd(rbXnY, angState1T1, __normalVel4);
const Vec4V normalVel1 = V4MulAdd(linVel0T2, normalZ, _normalVel1);
const Vec4V normalVel2 = V4MulAdd(raXnZ, angState0T2, _normalVel2);
const Vec4V normalVel3 = V4MulAdd(linVel1T2, normalZ, _normalVel3);
const Vec4V normalVel4 = V4MulAdd(rbXnZ, angState1T2, _normalVel4);
const Vec4V _normalVel = V4Add(normalVel1, normalVel2);
const Vec4V __normalVel = V4Add(normalVel3, normalVel4);
const Vec4V normalVel = V4Sub(_normalVel, __normalVel );
const Vec4V tmp = V4NegMulSub(targetVel, velMultiplier, appliedForce);
Vec4V newAppliedForce = V4MulAdd(normalVel, velMultiplier, tmp);
newAppliedForce = V4Clamp(newAppliedForce,nMaxFriction, maxFriction);
const Vec4V deltaF = V4Sub(newAppliedForce, appliedForce);
const Vec4V deltaLinF0 = V4Mul(invMass0D0, deltaF);
const Vec4V deltaLinF1 = V4Mul(invMass1D1, deltaF);
const Vec4V deltaAngF0 = V4Mul(angD0, deltaF);
const Vec4V deltaAngF1 = V4Mul(angD1, deltaF);
linVel0T0 = V4MulAdd(normalX, deltaLinF0, linVel0T0);
linVel1T0 = V4NegMulSub(normalX, deltaLinF1, linVel1T0);
angState0T0 = V4MulAdd(raXnX, deltaAngF0, angState0T0);
angState1T0 = V4NegMulSub(rbXnX, deltaAngF1, angState1T0);
linVel0T1 = V4MulAdd(normalY, deltaLinF0, linVel0T1);
linVel1T1 = V4NegMulSub(normalY, deltaLinF1, linVel1T1);
angState0T1 = V4MulAdd(raXnY, deltaAngF0, angState0T1);
angState1T1 = V4NegMulSub(rbXnY, deltaAngF1, angState1T1);
linVel0T2 = V4MulAdd(normalZ, deltaLinF0, linVel0T2);
linVel1T2 = V4NegMulSub(normalZ, deltaLinF1, linVel1T2);
angState0T2 = V4MulAdd(raXnZ, deltaAngF0, angState0T2);
angState1T2 = V4NegMulSub(rbXnZ, deltaAngF1, angState1T2);
f.appliedForce = newAppliedForce;
}
}
PX_ASSERT(currPtr == endPtr);
//KS - we need to use PX_TRANSPOSE_44 here instead of the 34_43 variants because the W components are being used to
//store the bodies' progress counters.
PX_TRANSPOSE_44(linVel0T0, linVel0T1, linVel0T2, linVel0T3, linVel00, linVel10, linVel20, linVel30);
PX_TRANSPOSE_44(linVel1T0, linVel1T1, linVel1T2, linVel1T3, linVel01, linVel11, linVel21, linVel31);
PX_TRANSPOSE_44(angState0T0, angState0T1, angState0T2, angState0T3, angState00, angState10, angState20, angState30);
PX_TRANSPOSE_44(angState1T0, angState1T1, angState1T2, angState1T3, angState01, angState11, angState21, angState31);
// Write back
// Write back
V4StoreA(linVel00, &b00.linearVelocity.x);
V4StoreA(linVel10, &b10.linearVelocity.x);
V4StoreA(linVel20, &b20.linearVelocity.x);
V4StoreA(linVel30, &b30.linearVelocity.x);
V4StoreA(linVel01, &b01.linearVelocity.x);
V4StoreA(linVel11, &b11.linearVelocity.x);
V4StoreA(linVel21, &b21.linearVelocity.x);
V4StoreA(linVel31, &b31.linearVelocity.x);
V4StoreA(angState00, &b00.angularState.x);
V4StoreA(angState10, &b10.angularState.x);
V4StoreA(angState20, &b20.angularState.x);
V4StoreA(angState30, &b30.angularState.x);
V4StoreA(angState01, &b01.angularState.x);
V4StoreA(angState11, &b11.angularState.x);
V4StoreA(angState21, &b21.angularState.x);
V4StoreA(angState31, &b31.angularState.x);
}
static void solveFriction4_StaticBlock(const PxSolverConstraintDesc* PX_RESTRICT desc, SolverContext& /*cache*/)
{
PxSolverBody& b00 = *desc[0].bodyA;
PxSolverBody& b10 = *desc[1].bodyA;
PxSolverBody& b20 = *desc[2].bodyA;
PxSolverBody& b30 = *desc[3].bodyA;
Vec4V linVel00 = V4LoadA(&b00.linearVelocity.x);
Vec4V angState00 = V4LoadA(&b00.angularState.x);
Vec4V linVel10 = V4LoadA(&b10.linearVelocity.x);
Vec4V angState10 = V4LoadA(&b10.angularState.x);
Vec4V linVel20 = V4LoadA(&b20.linearVelocity.x);
Vec4V angState20 = V4LoadA(&b20.angularState.x);
Vec4V linVel30 = V4LoadA(&b30.linearVelocity.x);
Vec4V angState30 = V4LoadA(&b30.angularState.x);
Vec4V linVel0T0, linVel0T1, linVel0T2, linVel0T3;
Vec4V angState0T0, angState0T1, angState0T2, angState0T3;
PX_TRANSPOSE_44(linVel00, linVel10, linVel20, linVel30, linVel0T0, linVel0T1, linVel0T2, linVel0T3);
PX_TRANSPOSE_44(angState00, angState10, angState20, angState30, angState0T0, angState0T1, angState0T2, angState0T3);
PxU8* PX_RESTRICT currPtr = desc[0].constraint;
PxU8* PX_RESTRICT endPtr = desc[0].constraint + getConstraintLength(desc[0]);
while(currPtr < endPtr)
{
SolverFrictionHeader4* PX_RESTRICT hdr = reinterpret_cast<SolverFrictionHeader4*>(currPtr);
currPtr = reinterpret_cast<PxU8*>(hdr + 1);
Vec4V* appliedImpulses = reinterpret_cast<Vec4V*>(currPtr);
currPtr += hdr->numNormalConstr * sizeof(Vec4V);
Ps::prefetchLine(currPtr, 128);
Ps::prefetchLine(currPtr,256);
Ps::prefetchLine(currPtr,384);
const PxU32 numFrictionConstr = hdr->numFrictionConstr;
SolverFriction4Base* PX_RESTRICT frictions = reinterpret_cast<SolverFriction4Base*>(currPtr);
currPtr = reinterpret_cast<PxU8*>(frictions + hdr->numFrictionConstr);
const PxU32 maxFrictionConstr = numFrictionConstr;
const Vec4V staticFric = hdr->staticFriction;
const Vec4V invMass0D0 = hdr->invMassADom;
const Vec4V angD0 = hdr->angD0;
for(PxU32 i=0;i<maxFrictionConstr;i++)
{
SolverFriction4Base& f = frictions[i];
Ps::prefetchLine((&f)+1);
Ps::prefetchLine((&f)+1,128);
Ps::prefetchLine((&f)+1,256);
const Vec4V appliedImpulse = appliedImpulses[i>>hdr->frictionPerContact];
const Vec4V maxFriction = V4Mul(staticFric, appliedImpulse);
const Vec4V nMaxFriction = V4Neg(maxFriction);
const Vec4V normalX = f.normalX;
const Vec4V normalY = f.normalY;
const Vec4V normalZ = f.normalZ;
const Vec4V raXnX = f.raXnX;
const Vec4V raXnY = f.raXnY;
const Vec4V raXnZ = f.raXnZ;
const Vec4V appliedForce(f.appliedForce);
const Vec4V velMultiplier(f.velMultiplier);
const Vec4V targetVel(f.targetVelocity);
//4 x 4 Dot3 products encoded as 8 M44 transposes, 4 MulV and 8 MulAdd ops
const Vec4V __normalVel1 = V4Mul(linVel0T0, normalX);
const Vec4V __normalVel2 = V4Mul(raXnX, angState0T0);
const Vec4V _normalVel1 = V4MulAdd(linVel0T1, normalY, __normalVel1);
const Vec4V _normalVel2 = V4MulAdd(raXnY, angState0T1, __normalVel2);
const Vec4V normalVel1 = V4MulAdd(linVel0T2, normalZ, _normalVel1);
const Vec4V normalVel2 = V4MulAdd(raXnZ, angState0T2, _normalVel2);
const Vec4V delLinVel00 = V4Mul(normalX, invMass0D0);
const Vec4V delLinVel10 = V4Mul(normalY, invMass0D0);
const Vec4V normalVel = V4Add(normalVel1, normalVel2);
const Vec4V delLinVel20 = V4Mul(normalZ, invMass0D0);
const Vec4V tmp = V4NegMulSub(targetVel, velMultiplier, appliedForce);
Vec4V newAppliedForce = V4MulAdd(normalVel, velMultiplier, tmp);
newAppliedForce = V4Clamp(newAppliedForce,nMaxFriction, maxFriction);
const Vec4V deltaF = V4Sub(newAppliedForce, appliedForce);
const Vec4V deltaAngF0 = V4Mul(angD0, deltaF);
linVel0T0 = V4MulAdd(delLinVel00, deltaF, linVel0T0);
angState0T0 = V4MulAdd(raXnX, deltaAngF0, angState0T0);
linVel0T1 = V4MulAdd(delLinVel10, deltaF, linVel0T1);
angState0T1 = V4MulAdd(raXnY, deltaAngF0, angState0T1);
linVel0T2 = V4MulAdd(delLinVel20, deltaF, linVel0T2);
angState0T2 = V4MulAdd(raXnZ, deltaAngF0, angState0T2);
f.appliedForce = newAppliedForce;
}
}
PX_ASSERT(currPtr == endPtr);
//KS - we need to use PX_TRANSPOSE_44 here instead of the 34_43 variants because the W components are being used to
//store the bodies' progress counters.
PX_TRANSPOSE_44(linVel0T0, linVel0T1, linVel0T2, linVel0T3, linVel00, linVel10, linVel20, linVel30);
PX_TRANSPOSE_44(angState0T0, angState0T1, angState0T2, angState0T3, angState00, angState10, angState20, angState30);
// Write back
// Write back
V4StoreA(linVel00, &b00.linearVelocity.x);
V4StoreA(linVel10, &b10.linearVelocity.x);
V4StoreA(linVel20, &b20.linearVelocity.x);
V4StoreA(linVel30, &b30.linearVelocity.x);
V4StoreA(angState00, &b00.angularState.x);
V4StoreA(angState10, &b10.angularState.x);
V4StoreA(angState20, &b20.angularState.x);
V4StoreA(angState30, &b30.angularState.x);
}
static void concludeContactCoulomb4(const PxSolverConstraintDesc* desc, SolverContext& /*cache*/)
{
PxU8* PX_RESTRICT cPtr = desc[0].constraint;
const Vec4V zero = V4Zero();
const SolverContactCoulombHeader4* PX_RESTRICT firstHeader = reinterpret_cast<const SolverContactCoulombHeader4*>(cPtr);
PxU8* PX_RESTRICT last = desc[0].constraint + firstHeader->frictionOffset;
PxU32 pointStride = firstHeader->type == DY_SC_TYPE_BLOCK_RB_CONTACT ? sizeof(SolverContact4Dynamic) : sizeof(SolverContact4Base);
while(cPtr < last)
{
const SolverContactCoulombHeader4* PX_RESTRICT hdr = reinterpret_cast<const SolverContactCoulombHeader4*>(cPtr);
cPtr += sizeof(SolverContactCoulombHeader4);
const PxU32 numNormalConstr = hdr->numNormalConstr;
//if(cPtr < last)
//Ps::prefetchLine(cPtr, 512);
Ps::prefetchLine(cPtr,128);
Ps::prefetchLine(cPtr,256);
Ps::prefetchLine(cPtr,384);
for(PxU32 i=0;i<numNormalConstr;i++)
{
SolverContact4Base *c = reinterpret_cast<SolverContact4Base*>(cPtr);
cPtr += pointStride;
c->scaledBias = V4Max(c->scaledBias, zero);
}
}
PX_ASSERT(cPtr == last);
}
void writeBackContactCoulomb4(const PxSolverConstraintDesc* desc, SolverContext& cache,
const PxSolverBodyData** PX_RESTRICT bd0, const PxSolverBodyData** PX_RESTRICT bd1)
{
Vec4V normalForceV = V4Zero();
PxU8* PX_RESTRICT cPtr = desc[0].constraint;
PxReal* PX_RESTRICT vForceWriteback0 = reinterpret_cast<PxReal*>(desc[0].writeBack);
PxReal* PX_RESTRICT vForceWriteback1 = reinterpret_cast<PxReal*>(desc[1].writeBack);
PxReal* PX_RESTRICT vForceWriteback2 = reinterpret_cast<PxReal*>(desc[2].writeBack);
PxReal* PX_RESTRICT vForceWriteback3 = reinterpret_cast<PxReal*>(desc[3].writeBack);
const SolverContactCoulombHeader4* PX_RESTRICT firstHeader = reinterpret_cast<const SolverContactCoulombHeader4*>(cPtr);
PxU8* PX_RESTRICT last = desc[0].constraint + firstHeader->frictionOffset;
const PxU32 pointStride = firstHeader->type == DY_SC_TYPE_BLOCK_RB_CONTACT ? sizeof(SolverContact4Dynamic)
: sizeof(SolverContact4Base);
bool writeBackThresholds[4] = {false, false, false, false};
while(cPtr < last)
{
const SolverContactCoulombHeader4* PX_RESTRICT hdr = reinterpret_cast<const SolverContactCoulombHeader4*>(cPtr);
cPtr += sizeof(SolverContactCoulombHeader4);
writeBackThresholds[0] = hdr->flags[0] & SolverContactHeader::eHAS_FORCE_THRESHOLDS;
writeBackThresholds[1] = hdr->flags[1] & SolverContactHeader::eHAS_FORCE_THRESHOLDS;
writeBackThresholds[2] = hdr->flags[2] & SolverContactHeader::eHAS_FORCE_THRESHOLDS;
writeBackThresholds[3] = hdr->flags[3] & SolverContactHeader::eHAS_FORCE_THRESHOLDS;
const PxU32 numNormalConstr = hdr->numNormalConstr;
Ps::prefetchLine(cPtr, 256);
Ps::prefetchLine(cPtr, 384);
for(PxU32 i=0; i<numNormalConstr; i++)
{
SolverContact4Base* c = reinterpret_cast<SolverContact4Base*>(cPtr);
cPtr += pointStride;
const Vec4V appliedForce = c->appliedForce;
if(vForceWriteback0 && i < hdr->numNormalConstr0)
FStore(V4GetX(appliedForce), vForceWriteback0++);
if(vForceWriteback1 && i < hdr->numNormalConstr1)
FStore(V4GetY(appliedForce), vForceWriteback1++);
if(vForceWriteback2 && i < hdr->numNormalConstr2)
FStore(V4GetZ(appliedForce), vForceWriteback2++);
if(vForceWriteback3 && i < hdr->numNormalConstr3)
FStore(V4GetW(appliedForce), vForceWriteback3++);
normalForceV = V4Add(normalForceV, appliedForce);
}
}
PX_ASSERT(cPtr == last);
PX_ALIGN(16, PxReal nf[4]);
V4StoreA(normalForceV, nf);
//all constraint pointer in descs are the same constraint
Sc::ShapeInteraction** shapeInteractions = reinterpret_cast<SolverContactCoulombHeader4*>(desc[0].constraint)->shapeInteraction;
for(PxU32 a = 0; a < 4; ++a)
{
if(writeBackThresholds[a] && desc[a].linkIndexA == PxSolverConstraintDesc::NO_LINK && desc[a].linkIndexB == PxSolverConstraintDesc::NO_LINK &&
nf[a] !=0.f && (bd0[a]->reportThreshold < PX_MAX_REAL || bd1[a]->reportThreshold < PX_MAX_REAL))
{
ThresholdStreamElement elt;
elt.normalForce = nf[a];
elt.threshold = PxMin<float>(bd0[a]->reportThreshold, bd1[a]->reportThreshold);
elt.nodeIndexA = IG::NodeIndex(bd0[a]->nodeIndex);
elt.nodeIndexB = IG::NodeIndex(bd1[a]->nodeIndex);
elt.shapeInteraction = shapeInteractions[a];
Ps::order(elt.nodeIndexA, elt.nodeIndexB);
PX_ASSERT(elt.nodeIndexA < elt.nodeIndexB);
PX_ASSERT(cache.mThresholdStreamIndex<cache.mThresholdStreamLength);
cache.mThresholdStream[cache.mThresholdStreamIndex++] = elt;
}
}
}
void solveContactCoulombPreBlock(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 /*constraintCount*/, SolverContext& cache)
{
solveContactCoulomb4_Block(desc, cache);
}
void solveContactCoulombPreBlock_Static(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 /*constraintCount*/, SolverContext& cache)
{
solveContactCoulomb4_StaticBlock(desc, cache);
}
void solveContactCoulombPreBlock_Conclude(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 /*constraintCount*/, SolverContext& cache)
{
solveContactCoulomb4_Block(desc, cache);
concludeContactCoulomb4(desc, cache);
}
void solveContactCoulombPreBlock_ConcludeStatic(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 /*constraintCount*/, SolverContext& cache)
{
solveContactCoulomb4_StaticBlock(desc, cache);
concludeContactCoulomb4(desc, cache);
}
void solveContactCoulombPreBlock_WriteBack(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 /*constraintCount*/, SolverContext& cache)
{
solveContactCoulomb4_Block(desc, cache);
const PxSolverBodyData* bd0[4] = { &cache.solverBodyArray[desc[0].bodyADataIndex],
&cache.solverBodyArray[desc[1].bodyADataIndex],
&cache.solverBodyArray[desc[2].bodyADataIndex],
&cache.solverBodyArray[desc[3].bodyADataIndex]};
const PxSolverBodyData* bd1[4] = { &cache.solverBodyArray[desc[0].bodyBDataIndex],
&cache.solverBodyArray[desc[1].bodyBDataIndex],
&cache.solverBodyArray[desc[2].bodyBDataIndex],
&cache.solverBodyArray[desc[3].bodyBDataIndex]};
writeBackContactCoulomb4(desc, cache, bd0, bd1);
if(cache.mThresholdStreamIndex > (cache.mThresholdStreamLength - 4))
{
//Write back to global buffer
PxI32 threshIndex = physx::shdfnd::atomicAdd(cache.mSharedOutThresholdPairs, PxI32(cache.mThresholdStreamIndex)) - PxI32(cache.mThresholdStreamIndex);
for(PxU32 a = 0; a < cache.mThresholdStreamIndex; ++a)
{
cache.mSharedThresholdStream[a + threshIndex] = cache.mThresholdStream[a];
}
cache.mThresholdStreamIndex = 0;
}
}
void solveContactCoulombPreBlock_WriteBackStatic(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 /*constraintCount*/, SolverContext& cache)
{
solveContactCoulomb4_StaticBlock(desc, cache);
const PxSolverBodyData* bd0[4] = { &cache.solverBodyArray[desc[0].bodyADataIndex],
&cache.solverBodyArray[desc[1].bodyADataIndex],
&cache.solverBodyArray[desc[2].bodyADataIndex],
&cache.solverBodyArray[desc[3].bodyADataIndex]};
const PxSolverBodyData* bd1[4] = { &cache.solverBodyArray[desc[0].bodyBDataIndex],
&cache.solverBodyArray[desc[1].bodyBDataIndex],
&cache.solverBodyArray[desc[2].bodyBDataIndex],
&cache.solverBodyArray[desc[3].bodyBDataIndex]};
writeBackContactCoulomb4(desc, cache, bd0, bd1);
if(cache.mThresholdStreamIndex > (cache.mThresholdStreamLength - 4))
{
//Write back to global buffer
PxI32 threshIndex = physx::shdfnd::atomicAdd(cache.mSharedOutThresholdPairs, PxI32(cache.mThresholdStreamIndex)) - PxI32(cache.mThresholdStreamIndex);
for(PxU32 a = 0; a < cache.mThresholdStreamIndex; ++a)
{
cache.mSharedThresholdStream[a + threshIndex] = cache.mThresholdStream[a];
}
cache.mThresholdStreamIndex = 0;
}
}
void solveFrictionCoulombPreBlock(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 /*constraintCount*/, SolverContext& cache)
{
solveFriction4_Block(desc, cache);
}
void solveFrictionCoulombPreBlock_Static(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 /*constraintCount*/, SolverContext& cache)
{
solveFriction4_StaticBlock(desc, cache);
}
void solveFrictionCoulombPreBlock_Conclude(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 /*constraintCount*/, SolverContext& cache)
{
solveFriction4_Block(desc, cache);
}
void solveFrictionCoulombPreBlock_ConcludeStatic(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 /*constraintCount*/, SolverContext& cache)
{
solveFriction4_StaticBlock(desc, cache);
}
void solveFrictionCoulombPreBlock_WriteBack(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 /*constraintCount*/, SolverContext& cache)
{
solveFriction4_Block(desc, cache);
}
void solveFrictionCoulombPreBlock_WriteBackStatic(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 /*constraintCount*/, SolverContext& cache)
{
solveFriction4_StaticBlock(desc, cache);
}
}
}

View File

@ -0,0 +1,142 @@
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2021 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#ifndef DY_SPATIAL_H
#define DY_SPATIAL_H
#include "foundation/PxVec3.h"
#include "foundation/PxTransform.h"
#include "PsMathUtils.h"
#include "CmSpatialVector.h"
namespace physx
{
namespace Dy
{
// translate a motion resolved at position p to the origin
// should have a 'from' frame and a 'to' frame
class SpInertia
{
public:
SpInertia() {}
SpInertia(const PxMat33& ll, const PxMat33& la, const PxMat33& aa): mLL(ll), mLA(la), mAA(aa)
{
}
static SpInertia getZero()
{
return SpInertia(PxMat33(PxZero), PxMat33(PxZero),
PxMat33(PxZero));
}
static SpInertia dyad(const Cm::SpatialVector& column, const Cm::SpatialVector& row)
{
return SpInertia(dyad(column.linear, row.linear),
dyad(column.linear, row.angular),
dyad(column.angular, row.angular));
}
static SpInertia inertia(PxReal mass, const PxVec3& inertia)
{
return SpInertia(PxMat33::createDiagonal(PxVec3(mass,mass,mass)), PxMat33(PxZero),
PxMat33::createDiagonal(inertia));
}
SpInertia operator+(const SpInertia& m) const
{
return SpInertia(mLL+m.mLL, mLA+m.mLA, mAA+m.mAA);
}
SpInertia operator-(const SpInertia& m) const
{
return SpInertia(mLL-m.mLL, mLA-m.mLA, mAA-m.mAA);
}
SpInertia operator*(PxReal r) const
{
return SpInertia(mLL*r, mLA*r, mAA*r);
}
void operator+=(const SpInertia& m)
{
mLL+=m.mLL;
mLA+=m.mLA;
mAA+=m.mAA;
}
void operator-=(const SpInertia& m)
{
mLL-=m.mLL;
mLA-=m.mLA;
mAA-=m.mAA;
}
PX_FORCE_INLINE Cm::SpatialVector operator *(const Cm::SpatialVector& v) const
{
return Cm::SpatialVector(mLL*v.linear +mLA*v.angular,
mLA.transformTranspose(v.linear)+mAA*v.angular);
}
SpInertia operator *(const SpInertia& v) const
{
return SpInertia(mLL*v.mLL + mLA * v.mLA.getTranspose(),
mLL*v.mLA + mLA * v.mAA,
mLA.getTranspose()*v.mLA + mAA * v.mAA);
}
bool isFinite() const
{
return true;
// return mLL.isFinite() && mLA.isFinite() && mAA.isFinite();
}
PxMat33 mLL, mLA; // linear force from angular motion, linear force from linear motion
PxMat33 mAA; // angular force from angular motion, mAL = mLA.transpose()
private:
static PxMat33 dyad(PxVec3 col, PxVec3 row)
{
return PxMat33(col*row.x, col*row.y, col*row.z);
}
};
}
}
#endif //DY_SPATIAL_H

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,142 @@
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2021 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#ifndef DY_TGS_CONTACTPREP_H
#define DY_TGS_CONTACTPREP_H
#include "foundation/PxPreprocessor.h"
#include "DySolverConstraintDesc.h"
#include "PxSceneDesc.h"
#include "DySolverContact4.h"
namespace physx
{
struct PxcNpWorkUnit;
class PxsConstraintBlockManager;
class PxcConstraintBlockStream;
struct PxsContactManagerOutput;
class FrictionPatchStreamPair;
struct PxSolverBody;
struct PxSolverBodyData;
struct PxSolverConstraintDesc;
class PxsContactManager;
namespace Dy
{
class ThreadContext;
struct CorrelationBuffer;
bool createFinalizeSolverContactsStep(PxTGSSolverContactDesc& contactDesc,
PxsContactManagerOutput& output,
ThreadContext& threadContext,
const PxReal invDtF32,
const PxReal invTotalDtF32,
PxReal bounceThresholdF32,
PxReal frictionOffsetThreshold,
PxReal correlationDistance,
PxConstraintAllocator& constraintAllocator);
bool createFinalizeSolverContactsStep(
PxTGSSolverContactDesc& contactDesc,
CorrelationBuffer& c,
const PxReal invDtF32,
const PxReal invTotalDtF32,
PxReal bounceThresholdF32,
PxReal frictionOffsetThreshold,
PxReal correlationDistance,
PxConstraintAllocator& constraintAllocator);
SolverConstraintPrepState::Enum setupSolverConstraintStep4
(PxTGSSolverConstraintPrepDesc* PX_RESTRICT constraintDescs,
const PxReal dt, const PxReal totalDt, const PxReal recipdt, const PxReal recipTotalDt, PxU32& totalRows,
PxConstraintAllocator& allocator, PxU32 maxRows,
const PxReal lengthScale);
PxU32 SetupSolverConstraintStep(SolverConstraintShaderPrepDesc& shaderDesc,
PxTGSSolverConstraintPrepDesc& prepDesc,
PxConstraintAllocator& allocator,
const PxReal dt, const PxReal totalDt, const PxReal invdt, const PxReal invTotalDt,
const PxReal lengthScale);
PxU32 setupSolverConstraintStep(
const PxTGSSolverConstraintPrepDesc& prepDesc,
PxConstraintAllocator& allocator,
const PxReal dt, const PxReal totalDt, const PxReal invdt, const PxReal invTotalDt,
const PxReal lengthScale);
SolverConstraintPrepState::Enum setupSolverConstraintStep4
(SolverConstraintShaderPrepDesc* PX_RESTRICT constraintShaderDescs,
PxTGSSolverConstraintPrepDesc* PX_RESTRICT constraintDescs,
const PxReal dt, const PxReal totalDt, const PxReal recipdt, const PxReal recipTotalDt, PxU32& totalRows,
PxConstraintAllocator& allocator, const PxReal lengthScale);
SolverConstraintPrepState::Enum createFinalizeSolverContacts4Step(
PxsContactManagerOutput** cmOutputs,
ThreadContext& threadContext,
PxTGSSolverContactDesc* blockDescs,
const PxReal invDtF32,
const PxReal invTotalDtF32,
PxReal bounceThresholdF32,
PxReal frictionOffsetThreshold,
PxReal correlationDistance,
PxReal solverOffsetSlop,
PxConstraintAllocator& constraintAllocator);
SolverConstraintPrepState::Enum createFinalizeSolverContacts4Step(
Dy::CorrelationBuffer& c,
PxTGSSolverContactDesc* blockDescs,
const PxReal invDtF32,
const PxReal invTotalDtF32,
PxReal bounceThresholdF32,
PxReal frictionOffsetThreshold,
PxReal correlationDistance,
PxReal solverOffsetSlop,
PxConstraintAllocator& constraintAllocator);
typedef void(*TGSSolveBlockMethod) (const PxConstraintBatchHeader& hdr, const PxSolverConstraintDesc* desc,
const PxTGSSolverBodyTxInertia* const txInertias, const PxReal minPenetration, const PxReal elapsedTime, SolverContext& cache);
typedef void (*TGSWriteBackMethod) (const PxConstraintBatchHeader& hdr, const PxSolverConstraintDesc* desc, SolverContext* cache);
typedef void (*TGSSolveConcludeMethod) (const PxConstraintBatchHeader& hdr, const PxSolverConstraintDesc* desc,
const PxTGSSolverBodyTxInertia* const txInertias, const PxReal elapsedTime, SolverContext& cache);
extern TGSSolveBlockMethod g_SolveTGSMethods[];
extern TGSWriteBackMethod g_WritebackTGSMethods[];
extern TGSSolveConcludeMethod g_SolveConcludeTGSMethods[];
}
}
#endif

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,517 @@
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2021 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#ifndef DY_TGS_DYNAMICS_H
#define DY_TGS_DYNAMICS_H
#include "PxvConfig.h"
#include "CmSpatialVector.h"
#include "CmTask.h"
#include "CmPool.h"
#include "PxcThreadCoherentCache.h"
#include "DyThreadContext.h"
#include "PxcConstraintBlockStream.h"
#include "DySolverBody.h"
#include "DyContext.h"
#include "PxsIslandManagerTypes.h"
#include "PxvNphaseImplementationContext.h"
#include "solver/PxSolverDefs.h"
#include "PxsIslandSim.h"
namespace physx
{
namespace Cm
{
class FlushPool;
}
namespace IG
{
class SimpleIslandManager;
struct Edge;
}
class PxsRigidBody;
class PxsStreamedThresholdTable;
struct PxsBodyCore;
struct PxsIslandObjects;
class PxsIslandIndices;
struct PxsIndexedInteraction;
class PxsIslandManager;
struct PxsIndexedConstraint;
struct PxsIndexedContactManager;
class PxsHeapMemoryAllocator;
class PxsMemoryManager;
class PxsDefaultMemoryManager;
namespace Cm
{
class Bitmap;
class SpatialVector;
}
namespace Dy
{
class SolverCore;
struct SolverIslandParams;
struct ArticulationSolverDesc;
class ArticulationV;
class DynamicsContext;
struct SolverContext;
struct SolverIslandObjectsStep
{
PxsRigidBody** bodies;
ArticulationV** articulations;
ArticulationV** articulationOwners;
PxsIndexedContactManager* contactManagers;
const IG::IslandId* islandIds;
PxU32 numIslands;
PxU32* bodyRemapTable;
PxU32* nodeIndexArray;
PxSolverConstraintDesc* constraintDescs;
PxSolverConstraintDesc* orderedConstraintDescs;
PxSolverConstraintDesc* tempConstraintDescs;
PxConstraintBatchHeader* constraintBatchHeaders;
Cm::SpatialVector* motionVelocities;
PxsBodyCore** bodyCoreArray;
PxU32 solverBodyOffset;
SolverIslandObjectsStep() : bodies(NULL), articulations(NULL), articulationOwners(NULL),
contactManagers(NULL), islandIds(NULL), numIslands(0), nodeIndexArray(NULL), constraintDescs(NULL), motionVelocities(NULL), bodyCoreArray(NULL),
solverBodyOffset(0)
{
}
};
struct IslandContextStep
{
//The thread context for this island (set in in the island start task, released in the island end task)
ThreadContext* mThreadContext;
PxsIslandIndices mCounts;
SolverIslandObjectsStep mObjects;
PxU32 mPosIters;
PxU32 mVelIters;
PxU32 mArticulationOffset;
PxReal mStepDt;
PxReal mInvStepDt;
PxI32 mSharedSolverIndex;
PxI32 mSolvedCount;
PxI32 mSharedRigidBodyIndex;
PxI32 mRigidBodyIntegratedCount;
PxI32 mSharedArticulationIndex;
PxI32 mArticulationIntegratedCount;
};
struct SolverIslandObjectsStep;
class SolverBodyVelDataPool : public Ps::Array<PxTGSSolverBodyVel, Ps::AlignedAllocator<128, Ps::ReflectionAllocator<PxTGSSolverBodyVel> > >
{
PX_NOCOPY(SolverBodyVelDataPool)
public:
SolverBodyVelDataPool() {}
};
class SolverBodyTxInertiaPool : public Ps::Array<PxTGSSolverBodyTxInertia, Ps::AlignedAllocator<128, Ps::ReflectionAllocator<PxTGSSolverBodyTxInertia> > >
{
PX_NOCOPY(SolverBodyTxInertiaPool)
public:
SolverBodyTxInertiaPool() {}
};
class SolverBodyDataStepPool : public Ps::Array<PxTGSSolverBodyData, Ps::AlignedAllocator<128, Ps::ReflectionAllocator<PxTGSSolverBodyData> > >
{
PX_NOCOPY(SolverBodyDataStepPool)
public:
SolverBodyDataStepPool() {}
};
class SolverStepConstraintDescPool : public Ps::Array<PxSolverConstraintDesc, Ps::AlignedAllocator<128, Ps::ReflectionAllocator<PxSolverConstraintDesc> > >
{
PX_NOCOPY(SolverStepConstraintDescPool)
public:
SolverStepConstraintDescPool() { }
};
#if PX_VC
#pragma warning(push)
#pragma warning( disable : 4324 ) // Padding was added at the end of a structure because of a __declspec(align) value.
#endif
class DynamicsTGSContext : public Context
{
PX_NOCOPY(DynamicsTGSContext)
public:
/**PxBaseTask* continuation
\brief Creates a DynamicsContext associated with a PxsContext
\return A pointer to the newly-created DynamicsContext.
*/
static DynamicsTGSContext* create(PxcNpMemBlockPool* memBlockPool,
PxcScratchAllocator& scratchAllocator,
Cm::FlushPool& taskPool,
PxvSimStats& simStats,
PxTaskManager* taskManager,
Ps::VirtualAllocatorCallback* allocator,
PxsMaterialManager* materialManager,
IG::IslandSim* accurateIslandSim,
PxU64 contextID,
const bool enableStabilization,
const bool useEnhancedDeterminism,
const bool useAdaptiveForce,
const PxReal lengthScale
);
/**
\brief Destroys this DynamicsContext
*/
void destroy();
/**
\brief Returns the static world solver body
\return The static world solver body.
*/
//PX_FORCE_INLINE PxSolverBody& getWorldSolverBody() { return mWorldSolverBody; }
PX_FORCE_INLINE Cm::FlushPool& getTaskPool() { return mTaskPool; }
PX_FORCE_INLINE ThresholdStream& getThresholdStream() { return *mThresholdStream; }
PX_FORCE_INLINE PxvSimStats& getSimStats() { return mSimStats; }
#if PX_ENABLE_SIM_STATS
void addThreadStats(const ThreadContext::ThreadSimStats& stats);
#endif
/**
\brief The entry point for the constraint solver.
\param[in] dt The simulation time-step
\param[in] continuation The continuation task for the solver
This method is called after the island generation has completed. Its main responsibilities are:
(1) Reserving the solver body pools
(2) Initializing the static and kinematic solver bodies, which are shared resources between islands.
(3) Construct the solver task chains for each island
Each island is solved as an independent solver task chain in parallel.
*/
virtual void update(IG::SimpleIslandManager& simpleIslandManager, PxBaseTask* continuation, PxBaseTask* lostTouchTask,
PxsContactManager** foundPatchManagers, PxU32 nbFoundPatchManagers, PxsContactManager** lostPatchManagers, PxU32 nbLostPatchManagers,
PxU32 maxPatchesPerCM, PxsContactManagerOutputIterator& iter, PxsContactManagerOutput* gpuOutputs, const PxReal dt, const PxVec3& gravity, const PxU32 bitMapWordCounts);
void updatePostKinematic(IG::SimpleIslandManager& simpleIslandManager, PxBaseTask* continuation,
PxBaseTask* lostTouchTask);
virtual void processLostPatches(IG::SimpleIslandManager& /*simpleIslandManager*/, PxsContactManager** /*lostPatchManagers*/, PxU32 /*nbLostPatchManagers*/, PxsContactManagerOutputIterator& /*iterator*/){}
virtual void updateBodyCore(PxBaseTask* continuation);
virtual void setSimulationController(PxsSimulationController* simulationController){ mSimulationController = simulationController; }
/**
\brief This method combines the results of several islands, e.g. constructing scene-level simulation statistics and merging together threshold streams for contact notification.
*/
virtual void mergeResults();
virtual void getDataStreamBase(void*& /*contactStreamBase*/, void*& /*patchStreamBase*/, void*& /*forceAndIndicesStreamBase*/){}
/**
\brief Allocates and returns a thread context object.
\return A thread context.
*/
PX_FORCE_INLINE ThreadContext* getThreadContext()
{
return mThreadContextPool.get();
}
/**
\brief Returns a thread context to the thread context pool.
\param[in] context The thread context to return to the thread context pool.
*/
void putThreadContext(ThreadContext* context)
{
mThreadContextPool.put(context);
}
PX_FORCE_INLINE PxU32 getKinematicCount() const { return mKinematicCount; }
PX_FORCE_INLINE PxU64 getContextId() const { return mContextID; }
PX_FORCE_INLINE PxReal getLengthScale() const { return mLengthScale; }
protected:
/**
\brief Constructor for DynamicsContext
*/
DynamicsTGSContext(PxcNpMemBlockPool* memBlockPool,
PxcScratchAllocator& scratchAllocator,
Cm::FlushPool& taskPool,
PxvSimStats& simStats,
PxTaskManager* taskManager,
Ps::VirtualAllocatorCallback* allocator,
PxsMaterialManager* materialManager,
IG::IslandSim* accurateIslandSim,
PxU64 contextID,
const bool enableStabilization,
const bool useEnhancedDeterminism,
const bool useAdaptiveForce,
const PxReal lengthScale
);
/**
\brief Destructor for DynamicsContext
*/
virtual ~DynamicsTGSContext();
// Solver helper-methods
/**
\brief Computes the unconstrained velocity for a given PxsRigidBody
\param[in] atom The PxsRigidBody
*/
void computeUnconstrainedVelocity(PxsRigidBody* atom) const;
/**
\brief fills in a PxSolverConstraintDesc from an indexed interaction
\param[in,out] desc The PxSolverConstraintDesc
\param[in] constraint The PxsIndexedInteraction
*/
void setDescFromIndices(PxSolverConstraintDesc& desc,
const PxsIndexedInteraction& constraint, const PxU32 solverBodyOffset, PxTGSSolverBodyVel* solverBodies);
void setDescFromIndices(PxSolverConstraintDesc& desc, IG::EdgeIndex edgeIndex,
const IG::SimpleIslandManager& islandManager, PxU32* bodyRemapTable, const PxU32 solverBodyOffset, PxTGSSolverBodyVel* solverBodies);
void solveIsland(const SolverIslandObjectsStep& objects,
const PxsIslandIndices& counts,
const PxU32 solverBodyOffset,
IG::SimpleIslandManager& islandManager,
PxU32* bodyRemapTable, PxsMaterialManager* materialManager,
PxsContactManagerOutputIterator& iterator,
PxBaseTask* continuation);
void prepareBodiesAndConstraints(const SolverIslandObjectsStep& objects,
IG::SimpleIslandManager& islandManager,
IslandContextStep& islandContext);
void setupDescs(IslandContextStep& islandContext, const SolverIslandObjectsStep& objects, IG::SimpleIslandManager& mIslandManager, PxU32* mBodyRemapTable, PxU32 mSolverBodyOffset,
PxsContactManagerOutputIterator& outputs);
void preIntegrateBodies(PxsBodyCore** bodyArray, PxsRigidBody** originalBodyArray,
PxTGSSolverBodyVel* solverBodyVelPool, PxTGSSolverBodyTxInertia* solverBodyTxInertia, PxTGSSolverBodyData* solverBodyDataPool2,
PxU32* nodeIndexArray, const PxU32 bodyCount, const PxVec3& gravity, const PxReal dt, PxU32& posIters, PxU32& velIters, PxU32 iteration);
void setupArticulations(IslandContextStep& islandContext, const PxVec3& gravity, const PxReal dt, PxU32& posIters, PxU32& velIters, PxBaseTask* continuation);
PxU32 setupArticulationInternalConstraints(IslandContextStep& islandContext, PxReal dt, PxReal invStepDt, PxSolverConstraintDesc* constraintDescs);
void createSolverConstraints(PxSolverConstraintDesc* contactDescPtr, PxConstraintBatchHeader* headers, const PxU32 nbHeaders,
PxsContactManagerOutputIterator& outputs, Dy::ThreadContext& islandThreadContext, Dy::ThreadContext& threadContext, PxReal stepDt, PxReal totalDt,
PxReal invStepDt, PxU32 nbSubsteps);
void solveConstraintsIteration(const PxSolverConstraintDesc* const contactDescPtr, const PxConstraintBatchHeader* const batchHeaders, const PxU32 nbHeaders, PxReal invStepDt,
const PxTGSSolverBodyTxInertia* const solverTxInertia, const PxReal elapsedTime, const PxReal minPenetration, SolverContext& cache);
void solveConcludeConstraintsIteration(const PxSolverConstraintDesc* const contactDescPtr, const PxConstraintBatchHeader* const batchHeaders, const PxU32 nbHeaders,
PxTGSSolverBodyTxInertia* solverTxInertia, const PxReal elapsedTime, SolverContext& cache);
void parallelSolveConstraints(const PxSolverConstraintDesc* const contactDescPtr, const PxConstraintBatchHeader* const batchHeaders, const PxU32 nbHeaders, PxTGSSolverBodyTxInertia* solverTxInertia,
const PxReal elapsedTime, const PxReal minPenetration, SolverContext& cache);
void writebackConstraintsIteration(const PxConstraintBatchHeader* const hdrs, const PxSolverConstraintDesc* const contactDescPtr, const PxU32 nbHeaders);
void parallelWritebackConstraintsIteration(const PxSolverConstraintDesc* const contactDescPtr, const PxConstraintBatchHeader* const batchHeaders, const PxU32 nbHeaders);
void integrateBodies(const SolverIslandObjectsStep& objects,
const PxU32 count, PxTGSSolverBodyVel* vels,
PxTGSSolverBodyTxInertia* txInertias, const PxTGSSolverBodyData*const bodyDatas, PxReal dt);
void parallelIntegrateBodies(PxTGSSolverBodyVel* vels, PxTGSSolverBodyTxInertia* txInertias,
const PxTGSSolverBodyData* const bodyDatas, const PxU32 count, PxReal dt);
void copyBackBodies(const SolverIslandObjectsStep& objects,
PxTGSSolverBodyVel* vels, PxTGSSolverBodyTxInertia* txInertias,
PxTGSSolverBodyData* solverBodyData, PxReal invDt, IG::IslandSim& islandSim,
PxU32 startIdx, PxU32 endIdx);
void updateArticulations(Dy::ThreadContext& threadContext, const PxU32 startIdx, const PxU32 endIdx, PxReal dt);
void stepArticulations(Dy::ThreadContext& threadContext, const PxsIslandIndices& counts, PxReal dt, PxReal stepInvDt);
void iterativeSolveIsland(const SolverIslandObjectsStep& objects, const PxsIslandIndices& counts, ThreadContext& mThreadContext,
const PxReal stepDt, const PxReal invStepDt, const PxU32 posIters, const PxU32 velIters, SolverContext& cache);
void iterativeSolveIslandParallel(const SolverIslandObjectsStep& objects, const PxsIslandIndices& counts, ThreadContext& mThreadContext,
const PxReal stepDt, const PxU32 posIters, const PxU32 velIters, PxI32* solverCounts, PxI32* integrationCounts, PxI32* articulationIntegrationCounts,
PxI32* solverProgressCount, PxI32* integrationProgressCount, PxI32* articulationProgressCount, PxU32 solverUnrollSize, PxU32 integrationUnrollSize);
void endIsland(ThreadContext& mThreadContext);
void finishSolveIsland(ThreadContext& mThreadContext, const SolverIslandObjectsStep& objects,
const PxsIslandIndices& counts, IG::SimpleIslandManager& islandManager, PxBaseTask* continuation);
/**
\brief Resets the thread contexts
*/
void resetThreadContexts();
/**
\brief Returns the scratch memory allocator.
\return The scratch memory allocator.
*/
PX_FORCE_INLINE PxcScratchAllocator& getScratchAllocator() { return mScratchAllocator; }
//Data
PxTGSSolverBodyVel mWorldSolverBodyVel;
PxTGSSolverBodyTxInertia mWorldSolverBodyTxInertia;
PxTGSSolverBodyData mWorldSolverBodyData2;
/**
\brief A thread context pool
*/
PxcThreadCoherentCache<ThreadContext, PxcNpMemBlockPool> mThreadContextPool;
/**
\brief Solver constraint desc array
*/
SolverStepConstraintDescPool mSolverConstraintDescPool;
SolverStepConstraintDescPool mOrderedSolverConstraintDescPool;
SolverStepConstraintDescPool mTempSolverConstraintDescPool;
Ps::Array<PxConstraintBatchHeader> mContactConstraintBatchHeaders;
/**
\brief Array of motion velocities for all bodies in the scene.
*/
Ps::Array<Cm::SpatialVector> mMotionVelocityArray;
/**
\brief Array of body core pointers for all bodies in the scene.
*/
Ps::Array<PxsBodyCore*> mBodyCoreArray;
/**
\brief Array of rigid body pointers for all bodies in the scene.
*/
Ps::Array<PxsRigidBody*> mRigidBodyArray;
/**
\brief Array of articulationpointers for all articulations in the scene.
*/
Ps::Array<ArticulationV*> mArticulationArray;
SolverBodyVelDataPool mSolverBodyVelPool;
SolverBodyTxInertiaPool mSolverBodyTxInertiaPool;
SolverBodyDataStepPool mSolverBodyDataPool2;
ThresholdStream* mExceededForceThresholdStream[2]; //this store previous and current exceeded force thresholdStream
Ps::Array<PxU32> mExceededForceThresholdStreamMask;
Ps::Array<PxU32> mSolverBodyRemapTable; //Remaps from the "active island" index to the index within a solver island
Ps::Array<PxU32> mNodeIndexArray; //island node index
Ps::Array<PxsIndexedContactManager> mContactList;
/**
\brief The total number of kinematic bodies in the scene
*/
PxU32 mKinematicCount;
/**
\brief Atomic counter for the number of threshold stream elements.
*/
PxI32 mThresholdStreamOut;
PxsMaterialManager* mMaterialManager;
PxsContactManagerOutputIterator mOutputIterator;
PxReal mLengthScale;
private:
//private:
PxcScratchAllocator& mScratchAllocator;
Cm::FlushPool& mTaskPool;
PxTaskManager* mTaskManager;
PxU32 mCurrentIndex; // this is the index point to the current exceeded force threshold stream
PxU64 mContextID;
friend class SetupDescsTask;
friend class PreIntegrateTask;
friend class SetupArticulationTask;
friend class SetupArticulationInternalConstraintsTask;
friend class SetupSolverConstraintsTask;
friend class SolveIslandTask;
friend class EndIslandTask;
friend class SetupSolverConstraintsSubTask;
friend class ParallelSolveTask;
friend class PreIntegrateParallelTask;
friend class CopyBackTask;
friend class UpdateArticTask;
friend class FinishSolveIslandTask;
};
#if PX_VC
#pragma warning(pop)
#endif
}
}
#endif

View File

@ -0,0 +1,110 @@
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2021 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#include "DyThreadContext.h"
#include "PsBitUtils.h"
namespace physx
{
namespace Dy
{
ThreadContext::ThreadContext(PxcNpMemBlockPool* memBlockPool):
mFrictionPatchStreamPair(*memBlockPool),
mConstraintBlockManager (*memBlockPool),
mConstraintBlockStream (*memBlockPool),
mNumDifferentBodyConstraints(0),
mNumSelfConstraints(0),
mNumStaticConstraints(0),
mConstraintsPerPartition(PX_DEBUG_EXP("ThreadContext::mConstraintsPerPartition")),
mFrictionConstraintsPerPartition(PX_DEBUG_EXP("ThreadContext::frictionsConstraintsPerPartition")),
mPartitionNormalizationBitmap(PX_DEBUG_EXP("ThreadContext::mPartitionNormalizationBitmap")),
frictionConstraintDescArray(PX_DEBUG_EXP("ThreadContext::solverFrictionConstraintArray")),
frictionConstraintBatchHeaders(PX_DEBUG_EXP("ThreadContext::frictionConstraintBatchHeaders")),
compoundConstraints(PX_DEBUG_EXP("ThreadContext::compoundConstraints")),
orderedContactList(PX_DEBUG_EXP("ThreadContext::orderedContactList")),
tempContactList(PX_DEBUG_EXP("ThreadContext::tempContactList")),
sortIndexArray(PX_DEBUG_EXP("ThreadContext::sortIndexArray")),
mConstraintSize (0),
mAxisConstraintCount(0),
mSelfConstraintBlocks(NULL),
mMaxPartitions(0),
mMaxSolverPositionIterations(0),
mMaxSolverVelocityIterations(0),
mMaxArticulationLength(0),
mContactDescPtr(NULL),
mFrictionDescPtr(NULL),
mArticulations(PX_DEBUG_EXP("ThreadContext::articulations"))
{
#if PX_ENABLE_SIM_STATS
mThreadSimStats.clear();
#endif
//Defaulted to have space for 16384 bodies
mPartitionNormalizationBitmap.reserve(512);
//Defaulted to have space for 128 partitions (should be more-than-enough)
mConstraintsPerPartition.reserve(128);
}
void ThreadContext::resizeArrays(PxU32 frictionConstraintDescCount, PxU32 articulationCount)
{
// resize resizes smaller arrays to the exact target size, which can generate a lot of churn
frictionConstraintDescArray.forceSize_Unsafe(0);
frictionConstraintDescArray.reserve((frictionConstraintDescCount+63)&~63);
mArticulations.forceSize_Unsafe(0);
mArticulations.reserve(PxMax<PxU32>(Ps::nextPowerOfTwo(articulationCount), 16));
mArticulations.forceSize_Unsafe(articulationCount);
mContactDescPtr = contactConstraintDescArray;
mFrictionDescPtr = frictionConstraintDescArray.begin();
}
void ThreadContext::reset()
{
// TODO: move these to the PxcNpThreadContext
mFrictionPatchStreamPair.reset();
mConstraintBlockStream.reset();
mContactDescPtr = contactConstraintDescArray;
mFrictionDescPtr = frictionConstraintDescArray.begin();
mAxisConstraintCount = 0;
mMaxSolverPositionIterations = 0;
mMaxSolverVelocityIterations = 0;
mNumDifferentBodyConstraints = 0;
mNumSelfConstraints = 0;
mNumStaticConstraints = 0;
mSelfConstraintBlocks = NULL;
mConstraintSize = 0;
}
}
}

View File

@ -0,0 +1,208 @@
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2021 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#ifndef DY_THREADCONTEXT_H
#define DY_THREADCONTEXT_H
#include "foundation/PxTransform.h"
#include "geomutils/GuContactBuffer.h"
#include "PxvConfig.h"
#include "PxvDynamics.h"
#include "PxcThreadCoherentCache.h"
#include "PxcConstraintBlockStream.h"
#include "CmBitMap.h"
#include "CmMatrix34.h"
#include "DyThresholdTable.h"
#include "DyArticulation.h"
#include "DyFrictionPatchStreamPair.h"
#include "DySolverConstraintDesc.h"
#include "DyCorrelationBuffer.h"
#include "PsAllocator.h"
namespace physx
{
struct PxsIndexedContactManager;
namespace Dy
{
/*!
Cache information specific to the software implementation(non common).
See PxcgetThreadContext.
Not thread-safe, so remember to have one object per thread!
TODO! refactor this and rename(it is a general per thread cache). Move transform cache into its own class.
*/
class ThreadContext :
public PxcThreadCoherentCache<ThreadContext, PxcNpMemBlockPool>::EntryBase
{
PX_NOCOPY(ThreadContext)
public:
#if PX_ENABLE_SIM_STATS
struct ThreadSimStats
{
void clear()
{
numActiveConstraints = 0;
numActiveDynamicBodies = 0;
numActiveKinematicBodies = 0;
numAxisSolverConstraints = 0;
}
PxU32 numActiveConstraints;
PxU32 numActiveDynamicBodies;
PxU32 numActiveKinematicBodies;
PxU32 numAxisSolverConstraints;
};
#endif
//TODO: tune cache size based on number of active objects.
ThreadContext(PxcNpMemBlockPool* memBlockPool);
void reset();
void resizeArrays(PxU32 frictionConstraintDescCount, PxU32 articulationCount);
PX_FORCE_INLINE Ps::Array<ArticulationSolverDesc>& getArticulations() { return mArticulations; }
#if PX_ENABLE_SIM_STATS
PX_FORCE_INLINE ThreadSimStats& getSimStats()
{
return mThreadSimStats;
}
#endif
Gu::ContactBuffer mContactBuffer;
// temporary buffer for correlation
PX_ALIGN(16, CorrelationBuffer mCorrelationBuffer);
FrictionPatchStreamPair mFrictionPatchStreamPair; // patch streams
PxsConstraintBlockManager mConstraintBlockManager; // for when this thread context is "lead" on an island
PxcConstraintBlockStream mConstraintBlockStream; // constraint block pool
// this stuff is just used for reformatting the solver data. Hopefully we should have a more
// sane format for this when the dust settles - so it's just temporary. If we keep this around
// here we should move these from public to private
PxU32 mNumDifferentBodyConstraints;
PxU32 mNumDifferentBodyFrictionConstraints;
PxU32 mNumSelfConstraints;
PxU32 mNumStaticConstraints;
PxU32 mNumSelfFrictionConstraints;
PxU32 mNumSelfConstraintFrictionBlocks;
Ps::Array<PxU32> mConstraintsPerPartition;
Ps::Array<PxU32> mFrictionConstraintsPerPartition;
Ps::Array<PxU32> mPartitionNormalizationBitmap;
PxsBodyCore** mBodyCoreArray;
PxsRigidBody** mRigidBodyArray;
ArticulationV** mArticulationArray;
Cm::SpatialVector* motionVelocityArray;
PxU32* bodyRemapTable;
PxU32* mNodeIndexArray;
//Constraint info for normal constraint sovler
PxSolverConstraintDesc* contactConstraintDescArray;
PxU32 contactDescArraySize;
PxSolverConstraintDesc* orderedContactConstraints;
PxConstraintBatchHeader* contactConstraintBatchHeaders;
PxU32 numContactConstraintBatches;
//Constraint info for partitioning
PxSolverConstraintDesc* tempConstraintDescArray;
//Additional constraint info for 1d/2d friction model
Ps::Array<PxSolverConstraintDesc> frictionConstraintDescArray;
Ps::Array<PxConstraintBatchHeader> frictionConstraintBatchHeaders;
//Info for tracking compound contact managers (temporary data - could use scratch memory!)
Ps::Array<CompoundContactManager> compoundConstraints;
//Used for sorting constraints. Temporary, could use scratch memory
Ps::Array<const PxsIndexedContactManager*> orderedContactList;
Ps::Array<const PxsIndexedContactManager*> tempContactList;
Ps::Array<PxU32> sortIndexArray;
Ps::Array<Cm::SpatialVectorF> mZVector;
Ps::Array<Cm::SpatialVectorF> mDeltaV;
PxU32 numDifferentBodyBatchHeaders;
PxU32 numSelfConstraintBatchHeaders;
PxU32 mOrderedContactDescCount;
PxU32 mOrderedFrictionDescCount;
PxU32 mConstraintSize;
PxU32 mAxisConstraintCount;
SelfConstraintBlock* mSelfConstraintBlocks;
SelfConstraintBlock* mSelfConstraintFrictionBlocks;
PxU32 mMaxPartitions;
PxU32 mMaxFrictionPartitions;
PxU32 mMaxSolverPositionIterations;
PxU32 mMaxSolverVelocityIterations;
PxU32 mMaxArticulationLength;
PxU32 mMaxArticulationSolverLength;
PxU32 mMaxArticulationLinks;
PxSolverConstraintDesc* mContactDescPtr;
PxSolverConstraintDesc* mStartContactDescPtr;
PxSolverConstraintDesc* mFrictionDescPtr;
private:
Ps::Array<ArticulationSolverDesc> mArticulations;
#if PX_ENABLE_SIM_STATS
ThreadSimStats mThreadSimStats;
#endif
public:
};
}
}
#endif //DY_THREADCONTEXT_H

View File

@ -0,0 +1,68 @@
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2021 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#include "foundation/PxMemory.h"
#include "DyThresholdTable.h"
#include "PsHash.h"
#include "PsUtilities.h"
#include "PsAllocator.h"
namespace physx
{
namespace Dy
{
bool ThresholdTable::check(const ThresholdStream& stream, const PxU32 nodeIndexA, const PxU32 nodeIndexB, PxReal dt)
{
PxU32* PX_RESTRICT hashes = mHash;
PxU32* PX_RESTRICT nextIndices = mNexts;
Pair* PX_RESTRICT pairs = mPairs;
/*const PxsRigidBody* b0 = PxMin(body0, body1);
const PxsRigidBody* b1 = PxMax(body0, body1);*/
const PxU32 nA = PxMin(nodeIndexA, nodeIndexB);
const PxU32 nB = PxMax(nodeIndexA, nodeIndexB);
PxU32 hashKey = computeHashKey(nodeIndexA, nodeIndexB, mHashSize);
PxU32 pairIndex = hashes[hashKey];
while(NO_INDEX != pairIndex)
{
Pair& pair = pairs[pairIndex];
const PxU32 thresholdStreamIndex = pair.thresholdStreamIndex;
PX_ASSERT(thresholdStreamIndex < stream.size());
const ThresholdStreamElement& otherElement = stream[thresholdStreamIndex];
if(otherElement.nodeIndexA.index()==nA && otherElement.nodeIndexB.index()==nB)
return (pair.accumulatedForce > (otherElement.threshold * dt));
pairIndex = nextIndices[pairIndex];
}
return false;
}
}
}