881 lines
27 KiB
C++
881 lines
27 KiB
C++
|
/*************************************************************************
|
||
|
* *
|
||
|
* Open Dynamics Engine, Copyright (C) 2001-2003 Russell L. Smith. *
|
||
|
* All rights reserved. Email: russ@q12.org Web: www.q12.org *
|
||
|
* *
|
||
|
* This library is free software; you can redistribute it and/or *
|
||
|
* modify it under the terms of EITHER: *
|
||
|
* (1) The GNU Lesser General Public License as published by the Free *
|
||
|
* Software Foundation; either version 2.1 of the License, or (at *
|
||
|
* your option) any later version. The text of the GNU Lesser *
|
||
|
* General Public License is included with this library in the *
|
||
|
* file LICENSE.TXT. *
|
||
|
* (2) The BSD-style license that is included with this library in *
|
||
|
* the file LICENSE-BSD.TXT. *
|
||
|
* *
|
||
|
* This library is distributed in the hope that it will be useful, *
|
||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
|
||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files *
|
||
|
* LICENSE.TXT and LICENSE-BSD.TXT for more details. *
|
||
|
* *
|
||
|
*************************************************************************/
|
||
|
|
||
|
#include "objects.h"
|
||
|
#include "joint.h"
|
||
|
#include <ode/config.h>
|
||
|
#include <ode/odemath.h>
|
||
|
#include <ode/rotation.h>
|
||
|
#include <ode/timer.h>
|
||
|
#include <ode/error.h>
|
||
|
#include <ode/matrix.h>
|
||
|
#include <ode/misc.h>
|
||
|
#include "lcp.h"
|
||
|
#include "util.h"
|
||
|
|
||
|
#define ALLOCA dALLOCA16
|
||
|
|
||
|
typedef const dReal *dRealPtr;
|
||
|
typedef dReal *dRealMutablePtr;
|
||
|
#define dRealArray(name,n) dReal name[n];
|
||
|
#define dRealAllocaArray(name,n) dReal *name = (dReal*) ALLOCA ((n)*sizeof(dReal));
|
||
|
|
||
|
//***************************************************************************
|
||
|
// configuration
|
||
|
|
||
|
// for the SOR and CG methods:
|
||
|
// uncomment the following line to use warm starting. this definitely
|
||
|
// help for motor-driven joints. unfortunately it appears to hurt
|
||
|
// with high-friction contacts using the SOR method. use with care
|
||
|
|
||
|
//#define WARM_STARTING 1
|
||
|
|
||
|
|
||
|
// for the SOR method:
|
||
|
// uncomment the following line to determine a new constraint-solving
|
||
|
// order for each iteration. however, the qsort per iteration is expensive,
|
||
|
// and the optimal order is somewhat problem dependent.
|
||
|
// @@@ try the leaf->root ordering.
|
||
|
|
||
|
//#define REORDER_CONSTRAINTS 1
|
||
|
|
||
|
|
||
|
// for the SOR method:
|
||
|
// uncomment the following line to randomly reorder constraint rows
|
||
|
// during the solution. depending on the situation, this can help a lot
|
||
|
// or hardly at all, but it doesn't seem to hurt.
|
||
|
|
||
|
#define RANDOMLY_REORDER_CONSTRAINTS 1
|
||
|
|
||
|
//****************************************************************************
|
||
|
// special matrix multipliers
|
||
|
|
||
|
// multiply block of B matrix (q x 6) with 12 dReal per row with C vektor (q)
|
||
|
static void Multiply1_12q1 (dReal *A, dReal *B, dReal *C, int q)
|
||
|
{
|
||
|
int k;
|
||
|
dReal sum;
|
||
|
dIASSERT (q>0 && A && B && C);
|
||
|
sum = 0;
|
||
|
for (k=0; k<q; k++) sum += B[k*12] * C[k];
|
||
|
A[0] = sum;
|
||
|
sum = 0;
|
||
|
for (k=0; k<q; k++) sum += B[1+k*12] * C[k];
|
||
|
A[1] = sum;
|
||
|
sum = 0;
|
||
|
for (k=0; k<q; k++) sum += B[2+k*12] * C[k];
|
||
|
A[2] = sum;
|
||
|
sum = 0;
|
||
|
for (k=0; k<q; k++) sum += B[3+k*12] * C[k];
|
||
|
A[3] = sum;
|
||
|
sum = 0;
|
||
|
for (k=0; k<q; k++) sum += B[4+k*12] * C[k];
|
||
|
A[4] = sum;
|
||
|
sum = 0;
|
||
|
for (k=0; k<q; k++) sum += B[5+k*12] * C[k];
|
||
|
A[5] = sum;
|
||
|
}
|
||
|
|
||
|
//***************************************************************************
|
||
|
// testing stuff
|
||
|
|
||
|
#ifdef TIMING
|
||
|
#define IFTIMING(x) x
|
||
|
#else
|
||
|
#define IFTIMING(x) /* */
|
||
|
#endif
|
||
|
|
||
|
//***************************************************************************
|
||
|
// various common computations involving the matrix J
|
||
|
|
||
|
// compute iMJ = inv(M)*J'
|
||
|
|
||
|
static void compute_invM_JT (int m, dRealMutablePtr J, dRealMutablePtr iMJ, int *jb,
|
||
|
dxBody * const *body, dRealPtr invI)
|
||
|
{
|
||
|
int i,j;
|
||
|
dRealMutablePtr iMJ_ptr = iMJ;
|
||
|
dRealMutablePtr J_ptr = J;
|
||
|
for (i=0; i<m; i++) {
|
||
|
int b1 = jb[i*2];
|
||
|
int b2 = jb[i*2+1];
|
||
|
dReal k = body[b1]->invMass;
|
||
|
for (j=0; j<3; j++) iMJ_ptr[j] = k*J_ptr[j];
|
||
|
dMULTIPLY0_331 (iMJ_ptr + 3, invI + 12*b1, J_ptr + 3);
|
||
|
if (b2 >= 0) {
|
||
|
k = body[b2]->invMass;
|
||
|
for (j=0; j<3; j++) iMJ_ptr[j+6] = k*J_ptr[j+6];
|
||
|
dMULTIPLY0_331 (iMJ_ptr + 9, invI + 12*b2, J_ptr + 9);
|
||
|
}
|
||
|
J_ptr += 12;
|
||
|
iMJ_ptr += 12;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
// compute out = inv(M)*J'*in.
|
||
|
|
||
|
static void multiply_invM_JT (int m, int nb, dRealMutablePtr iMJ, int *jb,
|
||
|
dRealMutablePtr in, dRealMutablePtr out)
|
||
|
{
|
||
|
int i,j;
|
||
|
dSetZero (out,6*nb);
|
||
|
dRealPtr iMJ_ptr = iMJ;
|
||
|
for (i=0; i<m; i++) {
|
||
|
int b1 = jb[i*2];
|
||
|
int b2 = jb[i*2+1];
|
||
|
dRealMutablePtr out_ptr = out + b1*6;
|
||
|
for (j=0; j<6; j++) out_ptr[j] += iMJ_ptr[j] * in[i];
|
||
|
iMJ_ptr += 6;
|
||
|
if (b2 >= 0) {
|
||
|
out_ptr = out + b2*6;
|
||
|
for (j=0; j<6; j++) out_ptr[j] += iMJ_ptr[j] * in[i];
|
||
|
}
|
||
|
iMJ_ptr += 6;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
// compute out = J*in.
|
||
|
|
||
|
static void multiply_J (int m, dRealMutablePtr J, int *jb,
|
||
|
dRealMutablePtr in, dRealMutablePtr out)
|
||
|
{
|
||
|
int i,j;
|
||
|
dRealPtr J_ptr = J;
|
||
|
for (i=0; i<m; i++) {
|
||
|
int b1 = jb[i*2];
|
||
|
int b2 = jb[i*2+1];
|
||
|
dReal sum = 0;
|
||
|
dRealMutablePtr in_ptr = in + b1*6;
|
||
|
for (j=0; j<6; j++) sum += J_ptr[j] * in_ptr[j];
|
||
|
J_ptr += 6;
|
||
|
if (b2 >= 0) {
|
||
|
in_ptr = in + b2*6;
|
||
|
for (j=0; j<6; j++) sum += J_ptr[j] * in_ptr[j];
|
||
|
}
|
||
|
J_ptr += 6;
|
||
|
out[i] = sum;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
// compute out = (J*inv(M)*J' + cfm)*in.
|
||
|
// use z as an nb*6 temporary.
|
||
|
|
||
|
static void multiply_J_invM_JT (int m, int nb, dRealMutablePtr J, dRealMutablePtr iMJ, int *jb,
|
||
|
dRealPtr cfm, dRealMutablePtr z, dRealMutablePtr in, dRealMutablePtr out)
|
||
|
{
|
||
|
multiply_invM_JT (m,nb,iMJ,jb,in,z);
|
||
|
multiply_J (m,J,jb,z,out);
|
||
|
|
||
|
// add cfm
|
||
|
for (int i=0; i<m; i++) out[i] += cfm[i] * in[i];
|
||
|
}
|
||
|
|
||
|
//***************************************************************************
|
||
|
// conjugate gradient method with jacobi preconditioner
|
||
|
// THIS IS EXPERIMENTAL CODE that doesn't work too well, so it is ifdefed out.
|
||
|
//
|
||
|
// adding CFM seems to be critically important to this method.
|
||
|
|
||
|
#if 0
|
||
|
|
||
|
static inline dReal dot (int n, dRealPtr x, dRealPtr y)
|
||
|
{
|
||
|
dReal sum=0;
|
||
|
for (int i=0; i<n; i++) sum += x[i]*y[i];
|
||
|
return sum;
|
||
|
}
|
||
|
|
||
|
|
||
|
// x = y + z*alpha
|
||
|
|
||
|
static inline void add (int n, dRealMutablePtr x, dRealPtr y, dRealPtr z, dReal alpha)
|
||
|
{
|
||
|
for (int i=0; i<n; i++) x[i] = y[i] + z[i]*alpha;
|
||
|
}
|
||
|
|
||
|
|
||
|
static void CG_LCP (int m, int nb, dRealMutablePtr J, int *jb, dxBody * const *body,
|
||
|
dRealPtr invI, dRealMutablePtr lambda, dRealMutablePtr fc, dRealMutablePtr b,
|
||
|
dRealMutablePtr lo, dRealMutablePtr hi, dRealPtr cfm, int *findex,
|
||
|
dxQuickStepParameters *qs)
|
||
|
{
|
||
|
int i,j;
|
||
|
const int num_iterations = qs->num_iterations;
|
||
|
|
||
|
// precompute iMJ = inv(M)*J'
|
||
|
dRealAllocaArray (iMJ,m*12);
|
||
|
compute_invM_JT (m,J,iMJ,jb,body,invI);
|
||
|
|
||
|
dReal last_rho = 0;
|
||
|
dRealAllocaArray (r,m);
|
||
|
dRealAllocaArray (z,m);
|
||
|
dRealAllocaArray (p,m);
|
||
|
dRealAllocaArray (q,m);
|
||
|
|
||
|
// precompute 1 / diagonals of A
|
||
|
dRealAllocaArray (Ad,m);
|
||
|
dRealPtr iMJ_ptr = iMJ;
|
||
|
dRealPtr J_ptr = J;
|
||
|
for (i=0; i<m; i++) {
|
||
|
dReal sum = 0;
|
||
|
for (j=0; j<6; j++) sum += iMJ_ptr[j] * J_ptr[j];
|
||
|
if (jb[i*2+1] >= 0) {
|
||
|
for (j=6; j<12; j++) sum += iMJ_ptr[j] * J_ptr[j];
|
||
|
}
|
||
|
iMJ_ptr += 12;
|
||
|
J_ptr += 12;
|
||
|
Ad[i] = REAL(1.0) / (sum + cfm[i]);
|
||
|
}
|
||
|
|
||
|
#ifdef WARM_STARTING
|
||
|
// compute residual r = b - A*lambda
|
||
|
multiply_J_invM_JT (m,nb,J,iMJ,jb,cfm,fc,lambda,r);
|
||
|
for (i=0; i<m; i++) r[i] = b[i] - r[i];
|
||
|
#else
|
||
|
dSetZero (lambda,m);
|
||
|
memcpy (r,b,m*sizeof(dReal)); // residual r = b - A*lambda
|
||
|
#endif
|
||
|
|
||
|
for (int iteration=0; iteration < num_iterations; iteration++) {
|
||
|
for (i=0; i<m; i++) z[i] = r[i]*Ad[i]; // z = inv(M)*r
|
||
|
dReal rho = dot (m,r,z); // rho = r'*z
|
||
|
|
||
|
// @@@
|
||
|
// we must check for convergence, otherwise rho will go to 0 if
|
||
|
// we get an exact solution, which will introduce NaNs into the equations.
|
||
|
if (rho < 1e-10) {
|
||
|
printf ("CG returned at iteration %d\n",iteration);
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
if (iteration==0) {
|
||
|
memcpy (p,z,m*sizeof(dReal)); // p = z
|
||
|
}
|
||
|
else {
|
||
|
add (m,p,z,p,rho/last_rho); // p = z + (rho/last_rho)*p
|
||
|
}
|
||
|
|
||
|
// compute q = (J*inv(M)*J')*p
|
||
|
multiply_J_invM_JT (m,nb,J,iMJ,jb,cfm,fc,p,q);
|
||
|
|
||
|
dReal alpha = rho/dot (m,p,q); // alpha = rho/(p'*q)
|
||
|
add (m,lambda,lambda,p,alpha); // lambda = lambda + alpha*p
|
||
|
add (m,r,r,q,-alpha); // r = r - alpha*q
|
||
|
last_rho = rho;
|
||
|
}
|
||
|
|
||
|
// compute fc = inv(M)*J'*lambda
|
||
|
multiply_invM_JT (m,nb,iMJ,jb,lambda,fc);
|
||
|
|
||
|
#if 0
|
||
|
// measure solution error
|
||
|
multiply_J_invM_JT (m,nb,J,iMJ,jb,cfm,fc,lambda,r);
|
||
|
dReal error = 0;
|
||
|
for (i=0; i<m; i++) error += dFabs(r[i] - b[i]);
|
||
|
printf ("lambda error = %10.6e\n",error);
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
#endif
|
||
|
|
||
|
//***************************************************************************
|
||
|
// SOR-LCP method
|
||
|
|
||
|
// nb is the number of bodies in the body array.
|
||
|
// J is an m*12 matrix of constraint rows
|
||
|
// jb is an array of first and second body numbers for each constraint row
|
||
|
// invI is the global frame inverse inertia for each body (stacked 3x3 matrices)
|
||
|
//
|
||
|
// this returns lambda and fc (the constraint force).
|
||
|
// note: fc is returned as inv(M)*J'*lambda, the constraint force is actually J'*lambda
|
||
|
//
|
||
|
// b, lo and hi are modified on exit
|
||
|
|
||
|
|
||
|
struct IndexError {
|
||
|
dReal error; // error to sort on
|
||
|
int findex;
|
||
|
int index; // row index
|
||
|
};
|
||
|
|
||
|
|
||
|
#ifdef REORDER_CONSTRAINTS
|
||
|
|
||
|
static int compare_index_error (const void *a, const void *b)
|
||
|
{
|
||
|
const IndexError *i1 = (IndexError*) a;
|
||
|
const IndexError *i2 = (IndexError*) b;
|
||
|
if (i1->findex < 0 && i2->findex >= 0) return -1;
|
||
|
if (i1->findex >= 0 && i2->findex < 0) return 1;
|
||
|
if (i1->error < i2->error) return -1;
|
||
|
if (i1->error > i2->error) return 1;
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
#endif
|
||
|
|
||
|
|
||
|
static void SOR_LCP (int m, int nb, dRealMutablePtr J, int *jb, dxBody * const *body,
|
||
|
dRealPtr invI, dRealMutablePtr lambda, dRealMutablePtr fc, dRealMutablePtr b,
|
||
|
dRealMutablePtr lo, dRealMutablePtr hi, dRealPtr cfm, int *findex,
|
||
|
dxQuickStepParameters *qs)
|
||
|
{
|
||
|
const int num_iterations = qs->num_iterations;
|
||
|
const dReal sor_w = qs->w; // SOR over-relaxation parameter
|
||
|
|
||
|
int i,j;
|
||
|
|
||
|
#ifdef WARM_STARTING
|
||
|
// for warm starting, this seems to be necessary to prevent
|
||
|
// jerkiness in motor-driven joints. i have no idea why this works.
|
||
|
for (i=0; i<m; i++) lambda[i] *= 0.9;
|
||
|
#else
|
||
|
dSetZero (lambda,m);
|
||
|
#endif
|
||
|
|
||
|
#ifdef REORDER_CONSTRAINTS
|
||
|
// the lambda computed at the previous iteration.
|
||
|
// this is used to measure error for when we are reordering the indexes.
|
||
|
dRealAllocaArray (last_lambda,m);
|
||
|
#endif
|
||
|
|
||
|
// a copy of the 'hi' vector in case findex[] is being used
|
||
|
dRealAllocaArray (hicopy,m);
|
||
|
memcpy (hicopy,hi,m*sizeof(dReal));
|
||
|
|
||
|
// precompute iMJ = inv(M)*J'
|
||
|
dRealAllocaArray (iMJ,m*12);
|
||
|
compute_invM_JT (m,J,iMJ,jb,body,invI);
|
||
|
|
||
|
// compute fc=(inv(M)*J')*lambda. we will incrementally maintain fc
|
||
|
// as we change lambda.
|
||
|
#ifdef WARM_STARTING
|
||
|
multiply_invM_JT (m,nb,iMJ,jb,lambda,fc);
|
||
|
#else
|
||
|
dSetZero (fc,nb*6);
|
||
|
#endif
|
||
|
|
||
|
// precompute 1 / diagonals of A
|
||
|
dRealAllocaArray (Ad,m);
|
||
|
dRealPtr iMJ_ptr = iMJ;
|
||
|
dRealMutablePtr J_ptr = J;
|
||
|
for (i=0; i<m; i++) {
|
||
|
dReal sum = 0;
|
||
|
for (j=0; j<6; j++) sum += iMJ_ptr[j] * J_ptr[j];
|
||
|
if (jb[i*2+1] >= 0) {
|
||
|
for (j=6; j<12; j++) sum += iMJ_ptr[j] * J_ptr[j];
|
||
|
}
|
||
|
iMJ_ptr += 12;
|
||
|
J_ptr += 12;
|
||
|
Ad[i] = sor_w / (sum + cfm[i]);
|
||
|
}
|
||
|
|
||
|
// scale J and b by Ad
|
||
|
J_ptr = J;
|
||
|
for (i=0; i<m; i++) {
|
||
|
for (j=0; j<12; j++) {
|
||
|
J_ptr[0] *= Ad[i];
|
||
|
J_ptr++;
|
||
|
}
|
||
|
b[i] *= Ad[i];
|
||
|
|
||
|
// scale Ad by CFM. N.B. this should be done last since it is used above
|
||
|
Ad[i] *= cfm[i];
|
||
|
}
|
||
|
|
||
|
// order to solve constraint rows in
|
||
|
IndexError *order = (IndexError*) ALLOCA (m*sizeof(IndexError));
|
||
|
|
||
|
#ifndef REORDER_CONSTRAINTS
|
||
|
// make sure constraints with findex < 0 come first.
|
||
|
j=0;
|
||
|
int k=1;
|
||
|
|
||
|
// Fill the array from both ends
|
||
|
for (i=0; i<m; i++)
|
||
|
if (findex[i] < 0)
|
||
|
order[j++].index = i; // Place them at the front
|
||
|
else
|
||
|
order[m-k++].index = i; // Place them at the end
|
||
|
|
||
|
dIASSERT ((j+k-1)==m); // -1 since k was started at 1 and not 0
|
||
|
#endif
|
||
|
|
||
|
for (int iteration=0; iteration < num_iterations; iteration++) {
|
||
|
|
||
|
#ifdef REORDER_CONSTRAINTS
|
||
|
// constraints with findex < 0 always come first.
|
||
|
if (iteration < 2) {
|
||
|
// for the first two iterations, solve the constraints in
|
||
|
// the given order
|
||
|
for (i=0; i<m; i++) {
|
||
|
order[i].error = i;
|
||
|
order[i].findex = findex[i];
|
||
|
order[i].index = i;
|
||
|
}
|
||
|
}
|
||
|
else {
|
||
|
// sort the constraints so that the ones converging slowest
|
||
|
// get solved last. use the absolute (not relative) error.
|
||
|
for (i=0; i<m; i++) {
|
||
|
dReal v1 = dFabs (lambda[i]);
|
||
|
dReal v2 = dFabs (last_lambda[i]);
|
||
|
dReal max = (v1 > v2) ? v1 : v2;
|
||
|
if (max > 0) {
|
||
|
//@@@ relative error: order[i].error = dFabs(lambda[i]-last_lambda[i])/max;
|
||
|
order[i].error = dFabs(lambda[i]-last_lambda[i]);
|
||
|
}
|
||
|
else {
|
||
|
order[i].error = dInfinity;
|
||
|
}
|
||
|
order[i].findex = findex[i];
|
||
|
order[i].index = i;
|
||
|
}
|
||
|
}
|
||
|
qsort (order,m,sizeof(IndexError),&compare_index_error);
|
||
|
|
||
|
//@@@ potential optimization: swap lambda and last_lambda pointers rather
|
||
|
// than copying the data. we must make sure lambda is properly
|
||
|
// returned to the caller
|
||
|
memcpy (last_lambda,lambda,m*sizeof(dReal));
|
||
|
#endif
|
||
|
#ifdef RANDOMLY_REORDER_CONSTRAINTS
|
||
|
if ((iteration & 7) == 0) {
|
||
|
for (i=1; i<m; ++i) {
|
||
|
IndexError tmp = order[i];
|
||
|
int swapi = dRandInt(i+1);
|
||
|
order[i] = order[swapi];
|
||
|
order[swapi] = tmp;
|
||
|
}
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
for (int i=0; i<m; i++) {
|
||
|
// @@@ potential optimization: we could pre-sort J and iMJ, thereby
|
||
|
// linearizing access to those arrays. hmmm, this does not seem
|
||
|
// like a win, but we should think carefully about our memory
|
||
|
// access pattern.
|
||
|
|
||
|
int index = order[i].index;
|
||
|
J_ptr = J + index*12;
|
||
|
iMJ_ptr = iMJ + index*12;
|
||
|
|
||
|
// set the limits for this constraint. note that 'hicopy' is used.
|
||
|
// this is the place where the QuickStep method differs from the
|
||
|
// direct LCP solving method, since that method only performs this
|
||
|
// limit adjustment once per time step, whereas this method performs
|
||
|
// once per iteration per constraint row.
|
||
|
// the constraints are ordered so that all lambda[] values needed have
|
||
|
// already been computed.
|
||
|
if (findex[index] >= 0) {
|
||
|
hi[index] = dFabs (hicopy[index] * lambda[findex[index]]);
|
||
|
lo[index] = -hi[index];
|
||
|
}
|
||
|
|
||
|
int b1 = jb[index*2];
|
||
|
int b2 = jb[index*2+1];
|
||
|
dReal delta = b[index] - lambda[index]*Ad[index];
|
||
|
dRealMutablePtr fc_ptr = fc + 6*b1;
|
||
|
|
||
|
// @@@ potential optimization: SIMD-ize this and the b2 >= 0 case
|
||
|
delta -=fc_ptr[0] * J_ptr[0] + fc_ptr[1] * J_ptr[1] +
|
||
|
fc_ptr[2] * J_ptr[2] + fc_ptr[3] * J_ptr[3] +
|
||
|
fc_ptr[4] * J_ptr[4] + fc_ptr[5] * J_ptr[5];
|
||
|
// @@@ potential optimization: handle 1-body constraints in a separate
|
||
|
// loop to avoid the cost of test & jump?
|
||
|
if (b2 >= 0) {
|
||
|
fc_ptr = fc + 6*b2;
|
||
|
delta -=fc_ptr[0] * J_ptr[6] + fc_ptr[1] * J_ptr[7] +
|
||
|
fc_ptr[2] * J_ptr[8] + fc_ptr[3] * J_ptr[9] +
|
||
|
fc_ptr[4] * J_ptr[10] + fc_ptr[5] * J_ptr[11];
|
||
|
}
|
||
|
|
||
|
// compute lambda and clamp it to [lo,hi].
|
||
|
// @@@ potential optimization: does SSE have clamping instructions
|
||
|
// to save test+jump penalties here?
|
||
|
dReal new_lambda = lambda[index] + delta;
|
||
|
if (new_lambda < lo[index]) {
|
||
|
delta = lo[index]-lambda[index];
|
||
|
lambda[index] = lo[index];
|
||
|
}
|
||
|
else if (new_lambda > hi[index]) {
|
||
|
delta = hi[index]-lambda[index];
|
||
|
lambda[index] = hi[index];
|
||
|
}
|
||
|
else {
|
||
|
lambda[index] = new_lambda;
|
||
|
}
|
||
|
|
||
|
//@@@ a trick that may or may not help
|
||
|
//dReal ramp = (1-((dReal)(iteration+1)/(dReal)num_iterations));
|
||
|
//delta *= ramp;
|
||
|
|
||
|
// update fc.
|
||
|
// @@@ potential optimization: SIMD for this and the b2 >= 0 case
|
||
|
fc_ptr = fc + 6*b1;
|
||
|
fc_ptr[0] += delta * iMJ_ptr[0];
|
||
|
fc_ptr[1] += delta * iMJ_ptr[1];
|
||
|
fc_ptr[2] += delta * iMJ_ptr[2];
|
||
|
fc_ptr[3] += delta * iMJ_ptr[3];
|
||
|
fc_ptr[4] += delta * iMJ_ptr[4];
|
||
|
fc_ptr[5] += delta * iMJ_ptr[5];
|
||
|
// @@@ potential optimization: handle 1-body constraints in a separate
|
||
|
// loop to avoid the cost of test & jump?
|
||
|
if (b2 >= 0) {
|
||
|
fc_ptr = fc + 6*b2;
|
||
|
fc_ptr[0] += delta * iMJ_ptr[6];
|
||
|
fc_ptr[1] += delta * iMJ_ptr[7];
|
||
|
fc_ptr[2] += delta * iMJ_ptr[8];
|
||
|
fc_ptr[3] += delta * iMJ_ptr[9];
|
||
|
fc_ptr[4] += delta * iMJ_ptr[10];
|
||
|
fc_ptr[5] += delta * iMJ_ptr[11];
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
void dxQuickStepper (dxWorld *world, dxBody * const *body, int nb,
|
||
|
dxJoint * const *_joint, int nj, dReal stepsize)
|
||
|
{
|
||
|
int i,j;
|
||
|
IFTIMING(dTimerStart("preprocessing");)
|
||
|
|
||
|
dReal stepsize1 = dRecip(stepsize);
|
||
|
|
||
|
// number all bodies in the body list - set their tag values
|
||
|
for (i=0; i<nb; i++) body[i]->tag = i;
|
||
|
|
||
|
// make a local copy of the joint array, because we might want to modify it.
|
||
|
// (the "dxJoint *const*" declaration says we're allowed to modify the joints
|
||
|
// but not the joint array, because the caller might need it unchanged).
|
||
|
//@@@ do we really need to do this? we'll be sorting constraint rows individually, not joints
|
||
|
dxJoint **joint = (dxJoint**) ALLOCA (nj * sizeof(dxJoint*));
|
||
|
memcpy (joint,_joint,nj * sizeof(dxJoint*));
|
||
|
|
||
|
// for all bodies, compute the inertia tensor and its inverse in the global
|
||
|
// frame, and compute the rotational force and add it to the torque
|
||
|
// accumulator. I and invI are a vertical stack of 3x4 matrices, one per body.
|
||
|
//dRealAllocaArray (I,3*4*nb); // need to remember all I's for feedback purposes only
|
||
|
dRealAllocaArray (invI,3*4*nb);
|
||
|
for (i=0; i<nb; i++) {
|
||
|
dMatrix3 tmp;
|
||
|
|
||
|
// compute inverse inertia tensor in global frame
|
||
|
dMULTIPLY2_333 (tmp,body[i]->invI,body[i]->posr.R);
|
||
|
dMULTIPLY0_333 (invI+i*12,body[i]->posr.R,tmp);
|
||
|
#ifdef dGYROSCOPIC
|
||
|
dMatrix3 I;
|
||
|
// compute inertia tensor in global frame
|
||
|
dMULTIPLY2_333 (tmp,body[i]->mass.I,body[i]->posr.R);
|
||
|
//dMULTIPLY0_333 (I+i*12,body[i]->posr.R,tmp);
|
||
|
dMULTIPLY0_333 (I,body[i]->posr.R,tmp);
|
||
|
// compute rotational force
|
||
|
//dMULTIPLY0_331 (tmp,I+i*12,body[i]->avel);
|
||
|
dMULTIPLY0_331 (tmp,I,body[i]->avel);
|
||
|
dCROSS (body[i]->tacc,-=,body[i]->avel,tmp);
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
// add the gravity force to all bodies
|
||
|
for (i=0; i<nb; i++) {
|
||
|
if ((body[i]->flags & dxBodyNoGravity)==0) {
|
||
|
body[i]->facc[0] += body[i]->mass.mass * world->gravity[0];
|
||
|
body[i]->facc[1] += body[i]->mass.mass * world->gravity[1];
|
||
|
body[i]->facc[2] += body[i]->mass.mass * world->gravity[2];
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// get joint information (m = total constraint dimension, nub = number of unbounded variables).
|
||
|
// joints with m=0 are inactive and are removed from the joints array
|
||
|
// entirely, so that the code that follows does not consider them.
|
||
|
//@@@ do we really need to save all the info1's
|
||
|
dxJoint::Info1 *info = (dxJoint::Info1*) ALLOCA (nj*sizeof(dxJoint::Info1));
|
||
|
for (i=0, j=0; j<nj; j++) { // i=dest, j=src
|
||
|
joint[j]->vtable->getInfo1 (joint[j],info+i);
|
||
|
dIASSERT (info[i].m >= 0 && info[i].m <= 6 && info[i].nub >= 0 && info[i].nub <= info[i].m);
|
||
|
if (info[i].m > 0) {
|
||
|
joint[i] = joint[j];
|
||
|
i++;
|
||
|
}
|
||
|
}
|
||
|
nj = i;
|
||
|
|
||
|
// create the row offset array
|
||
|
int m = 0;
|
||
|
int *ofs = (int*) ALLOCA (nj*sizeof(int));
|
||
|
for (i=0; i<nj; i++) {
|
||
|
ofs[i] = m;
|
||
|
m += info[i].m;
|
||
|
}
|
||
|
|
||
|
// if there are constraints, compute the constraint force
|
||
|
dRealAllocaArray (J,m*12);
|
||
|
int *jb = (int*) ALLOCA (m*2*sizeof(int));
|
||
|
if (m > 0) {
|
||
|
// create a constraint equation right hand side vector `c', a constraint
|
||
|
// force mixing vector `cfm', and LCP low and high bound vectors, and an
|
||
|
// 'findex' vector.
|
||
|
dRealAllocaArray (c,m);
|
||
|
dRealAllocaArray (cfm,m);
|
||
|
dRealAllocaArray (lo,m);
|
||
|
dRealAllocaArray (hi,m);
|
||
|
int *findex = (int*) ALLOCA (m*sizeof(int));
|
||
|
dSetZero (c,m);
|
||
|
dSetValue (cfm,m,world->global_cfm);
|
||
|
dSetValue (lo,m,-dInfinity);
|
||
|
dSetValue (hi,m, dInfinity);
|
||
|
for (i=0; i<m; i++) findex[i] = -1;
|
||
|
|
||
|
// get jacobian data from constraints. an m*12 matrix will be created
|
||
|
// to store the two jacobian blocks from each constraint. it has this
|
||
|
// format:
|
||
|
//
|
||
|
// l1 l1 l1 a1 a1 a1 l2 l2 l2 a2 a2 a2 \ .
|
||
|
// l1 l1 l1 a1 a1 a1 l2 l2 l2 a2 a2 a2 }-- jacobian for joint 0, body 1 and body 2 (3 rows)
|
||
|
// l1 l1 l1 a1 a1 a1 l2 l2 l2 a2 a2 a2 /
|
||
|
// l1 l1 l1 a1 a1 a1 l2 l2 l2 a2 a2 a2 }--- jacobian for joint 1, body 1 and body 2 (3 rows)
|
||
|
// etc...
|
||
|
//
|
||
|
// (lll) = linear jacobian data
|
||
|
// (aaa) = angular jacobian data
|
||
|
//
|
||
|
IFTIMING (dTimerNow ("create J");)
|
||
|
dSetZero (J,m*12);
|
||
|
dxJoint::Info2 Jinfo;
|
||
|
Jinfo.rowskip = 12;
|
||
|
Jinfo.fps = stepsize1;
|
||
|
Jinfo.erp = world->global_erp;
|
||
|
int mfb = 0; // number of rows of Jacobian we will have to save for joint feedback
|
||
|
for (i=0; i<nj; i++) {
|
||
|
Jinfo.J1l = J + ofs[i]*12;
|
||
|
Jinfo.J1a = Jinfo.J1l + 3;
|
||
|
Jinfo.J2l = Jinfo.J1l + 6;
|
||
|
Jinfo.J2a = Jinfo.J1l + 9;
|
||
|
Jinfo.c = c + ofs[i];
|
||
|
Jinfo.cfm = cfm + ofs[i];
|
||
|
Jinfo.lo = lo + ofs[i];
|
||
|
Jinfo.hi = hi + ofs[i];
|
||
|
Jinfo.findex = findex + ofs[i];
|
||
|
joint[i]->vtable->getInfo2 (joint[i],&Jinfo);
|
||
|
// adjust returned findex values for global index numbering
|
||
|
for (j=0; j<info[i].m; j++) {
|
||
|
if (findex[ofs[i] + j] >= 0) findex[ofs[i] + j] += ofs[i];
|
||
|
}
|
||
|
if (joint[i]->feedback)
|
||
|
mfb += info[i].m;
|
||
|
}
|
||
|
|
||
|
// we need a copy of Jacobian for joint feedbacks
|
||
|
// because it gets destroyed by SOR solver
|
||
|
// instead of saving all Jacobian, we can save just rows
|
||
|
// for joints, that requested feedback (which is normaly much less)
|
||
|
dRealAllocaArray (Jcopy,mfb*12);
|
||
|
if (mfb > 0) {
|
||
|
mfb = 0;
|
||
|
for (i=0; i<nj; i++)
|
||
|
if (joint[i]->feedback) {
|
||
|
memcpy(Jcopy+mfb*12, J+ofs[i]*12, info[i].m*12*sizeof(dReal));
|
||
|
mfb += info[i].m;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
// create an array of body numbers for each joint row
|
||
|
int *jb_ptr = jb;
|
||
|
for (i=0; i<nj; i++) {
|
||
|
int b1 = (joint[i]->node[0].body) ? (joint[i]->node[0].body->tag) : -1;
|
||
|
int b2 = (joint[i]->node[1].body) ? (joint[i]->node[1].body->tag) : -1;
|
||
|
for (j=0; j<info[i].m; j++) {
|
||
|
jb_ptr[0] = b1;
|
||
|
jb_ptr[1] = b2;
|
||
|
jb_ptr += 2;
|
||
|
}
|
||
|
}
|
||
|
dIASSERT (jb_ptr == jb+2*m);
|
||
|
|
||
|
// compute the right hand side `rhs'
|
||
|
IFTIMING (dTimerNow ("compute rhs");)
|
||
|
dRealAllocaArray (tmp1,nb*6);
|
||
|
// put v/h + invM*fe into tmp1
|
||
|
for (i=0; i<nb; i++) {
|
||
|
dReal body_invMass = body[i]->invMass;
|
||
|
for (j=0; j<3; j++) tmp1[i*6+j] = body[i]->facc[j] * body_invMass + body[i]->lvel[j] * stepsize1;
|
||
|
dMULTIPLY0_331 (tmp1 + i*6 + 3,invI + i*12,body[i]->tacc);
|
||
|
for (j=0; j<3; j++) tmp1[i*6+3+j] += body[i]->avel[j] * stepsize1;
|
||
|
}
|
||
|
|
||
|
// put J*tmp1 into rhs
|
||
|
dRealAllocaArray (rhs,m);
|
||
|
multiply_J (m,J,jb,tmp1,rhs);
|
||
|
|
||
|
// complete rhs
|
||
|
for (i=0; i<m; i++) rhs[i] = c[i]*stepsize1 - rhs[i];
|
||
|
|
||
|
// scale CFM
|
||
|
for (i=0; i<m; i++) cfm[i] *= stepsize1;
|
||
|
|
||
|
// load lambda from the value saved on the previous iteration
|
||
|
dRealAllocaArray (lambda,m);
|
||
|
#ifdef WARM_STARTING
|
||
|
dSetZero (lambda,m); //@@@ shouldn't be necessary
|
||
|
for (i=0; i<nj; i++) {
|
||
|
memcpy (lambda+ofs[i],joint[i]->lambda,info[i].m * sizeof(dReal));
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
// solve the LCP problem and get lambda and invM*constraint_force
|
||
|
IFTIMING (dTimerNow ("solving LCP problem");)
|
||
|
dRealAllocaArray (cforce,nb*6);
|
||
|
SOR_LCP (m,nb,J,jb,body,invI,lambda,cforce,rhs,lo,hi,cfm,findex,&world->qs);
|
||
|
|
||
|
#ifdef WARM_STARTING
|
||
|
// save lambda for the next iteration
|
||
|
//@@@ note that this doesn't work for contact joints yet, as they are
|
||
|
// recreated every iteration
|
||
|
for (i=0; i<nj; i++) {
|
||
|
memcpy (joint[i]->lambda,lambda+ofs[i],info[i].m * sizeof(dReal));
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
// note that the SOR method overwrites rhs and J at this point, so
|
||
|
// they should not be used again.
|
||
|
|
||
|
// add stepsize * cforce to the body velocity
|
||
|
for (i=0; i<nb; i++) {
|
||
|
for (j=0; j<3; j++) body[i]->lvel[j] += stepsize * cforce[i*6+j];
|
||
|
for (j=0; j<3; j++) body[i]->avel[j] += stepsize * cforce[i*6+3+j];
|
||
|
}
|
||
|
|
||
|
// if joint feedback is requested, compute the constraint force.
|
||
|
// BUT: cforce is inv(M)*J'*lambda, whereas we want just J'*lambda,
|
||
|
// so we must compute M*cforce.
|
||
|
// @@@ if any joint has a feedback request we compute the entire
|
||
|
// adjusted cforce, which is not the most efficient way to do it.
|
||
|
/*for (j=0; j<nj; j++) {
|
||
|
if (joint[j]->feedback) {
|
||
|
// compute adjusted cforce
|
||
|
for (i=0; i<nb; i++) {
|
||
|
dReal k = body[i]->mass.mass;
|
||
|
cforce [i*6+0] *= k;
|
||
|
cforce [i*6+1] *= k;
|
||
|
cforce [i*6+2] *= k;
|
||
|
dVector3 tmp;
|
||
|
dMULTIPLY0_331 (tmp, I + 12*i, cforce + i*6 + 3);
|
||
|
cforce [i*6+3] = tmp[0];
|
||
|
cforce [i*6+4] = tmp[1];
|
||
|
cforce [i*6+5] = tmp[2];
|
||
|
}
|
||
|
// compute feedback for this and all remaining joints
|
||
|
for (; j<nj; j++) {
|
||
|
dJointFeedback *fb = joint[j]->feedback;
|
||
|
if (fb) {
|
||
|
int b1 = joint[j]->node[0].body->tag;
|
||
|
memcpy (fb->f1,cforce+b1*6,3*sizeof(dReal));
|
||
|
memcpy (fb->t1,cforce+b1*6+3,3*sizeof(dReal));
|
||
|
if (joint[j]->node[1].body) {
|
||
|
int b2 = joint[j]->node[1].body->tag;
|
||
|
memcpy (fb->f2,cforce+b2*6,3*sizeof(dReal));
|
||
|
memcpy (fb->t2,cforce+b2*6+3,3*sizeof(dReal));
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}*/
|
||
|
|
||
|
if (mfb > 0) {
|
||
|
// straightforward computation of joint constraint forces:
|
||
|
// multiply related lambdas with respective J' block for joints
|
||
|
// where feedback was requested
|
||
|
mfb = 0;
|
||
|
for (i=0; i<nj; i++) {
|
||
|
if (joint[i]->feedback) {
|
||
|
dJointFeedback *fb = joint[i]->feedback;
|
||
|
dReal data[6];
|
||
|
Multiply1_12q1 (data, Jcopy+mfb*12, lambda+ofs[i], info[i].m);
|
||
|
fb->f1[0] = data[0];
|
||
|
fb->f1[1] = data[1];
|
||
|
fb->f1[2] = data[2];
|
||
|
fb->t1[0] = data[3];
|
||
|
fb->t1[1] = data[4];
|
||
|
fb->t1[2] = data[5];
|
||
|
if (joint[i]->node[1].body)
|
||
|
{
|
||
|
Multiply1_12q1 (data, Jcopy+mfb*12+6, lambda+ofs[i], info[i].m);
|
||
|
fb->f2[0] = data[0];
|
||
|
fb->f2[1] = data[1];
|
||
|
fb->f2[2] = data[2];
|
||
|
fb->t2[0] = data[3];
|
||
|
fb->t2[1] = data[4];
|
||
|
fb->t2[2] = data[5];
|
||
|
}
|
||
|
mfb += info[i].m;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// compute the velocity update:
|
||
|
// add stepsize * invM * fe to the body velocity
|
||
|
|
||
|
IFTIMING (dTimerNow ("compute velocity update");)
|
||
|
for (i=0; i<nb; i++) {
|
||
|
dReal body_invMass = body[i]->invMass;
|
||
|
for (j=0; j<3; j++) body[i]->lvel[j] += stepsize * body_invMass * body[i]->facc[j];
|
||
|
for (j=0; j<3; j++) body[i]->tacc[j] *= stepsize;
|
||
|
dMULTIPLYADD0_331 (body[i]->avel,invI + i*12,body[i]->tacc);
|
||
|
}
|
||
|
|
||
|
#if 0
|
||
|
// check that the updated velocity obeys the constraint (this check needs unmodified J)
|
||
|
dRealAllocaArray (vel,nb*6);
|
||
|
for (i=0; i<nb; i++) {
|
||
|
for (j=0; j<3; j++) vel[i*6+j] = body[i]->lvel[j];
|
||
|
for (j=0; j<3; j++) vel[i*6+3+j] = body[i]->avel[j];
|
||
|
}
|
||
|
dRealAllocaArray (tmp,m);
|
||
|
multiply_J (m,J,jb,vel,tmp);
|
||
|
dReal error = 0;
|
||
|
for (i=0; i<m; i++) error += dFabs(tmp[i]);
|
||
|
printf ("velocity error = %10.6e\n",error);
|
||
|
#endif
|
||
|
|
||
|
// update the position and orientation from the new linear/angular velocity
|
||
|
// (over the given timestep)
|
||
|
IFTIMING (dTimerNow ("update position");)
|
||
|
for (i=0; i<nb; i++) dxStepBody (body[i],stepsize);
|
||
|
|
||
|
IFTIMING (dTimerNow ("tidy up");)
|
||
|
|
||
|
// zero all force accumulators
|
||
|
for (i=0; i<nb; i++) {
|
||
|
dSetZero (body[i]->facc,3);
|
||
|
dSetZero (body[i]->tacc,3);
|
||
|
}
|
||
|
|
||
|
IFTIMING (dTimerEnd();)
|
||
|
IFTIMING (if (m > 0) dTimerReport (stdout,1);)
|
||
|
}
|