x=matrix(c(rnorm(10,1,3), rnorm(10,7,2), rnorm(10,15,4)), byrow=FALSE, nrow=10)
xc=x
for (k in 1:3) {
xc[,k]=xc[,k]-mean(xc[,k])
}
A=var(xc)
了
> A
[,1] [,2] [,3]
[1,] 10.166746 -2.619763 -6.717475
[2,] -2.619763 3.291888 3.052898
[3,] -6.717475 3.052898 22.313351
我想解PAP^T=I,其中p是3x3, p ^T是p的转置,I是3x3单位矩阵。solve(A)解出PA=I,但我想让PAP^T=I。我找不到答案,把矩阵相乘会花很长时间。有人知道如何在R或Python中实现这个吗?我也不想用P可以是A^(-1/2)这个事实。通过代码的解决方案会很好。
library(stringr)
A=matrix(c(letters[1:4], letters[6:8], letters[10:11]), nrow=3)
mult=function(A, B) {
AB=matrix(nrow=3, ncol=3)
for (r in 1:3) {
for (c in 1:3) {
AB[r,c]=str_c("(", A[r,1], ")*(", B[1,c], ")+(", A[r,2], ")*(", B[2,c], ")+(", A[r,3], ")*(", B[3,c], ")")
}
}
return(AB)
}
AB=mult(A,B)
ABAT=mult(AB, t(A))
seteq=function(A, B) {
E=matrix(nrow=3, ncol=3)
for (r in 1:3) {
for (c in 1:3) {
E[r,c]=str_c(A[r,c], "=", B[r, c])
}
}
dim(E)=c(9,1)
return(E[,1])
}
Eq=seteq(ABAT, diag(rep(1, 3)))
res=""
for (i in 1:9) {
res=str_c(res, Eq[i])
if (i!=9) res=str_c(res, ",")
}
res
给出方程
((a)*(18.9052758456522)+(d)*(0.834836569187845)+(h)*(9.76575358263375))*(a)+((a)*(0.834836569187845)+(d)*(0.555656932954595)+(h)*(1.03585433030833))*(d)+((a)*(9.76575358263375)+(d)*(1.03585433030833)+(h)*(6.84209765783974))*(h)=1,((b)*(18.9052758456522)+(f)*(0.834836569187845)+(j)*(9.76575358263375))*(a)+((b)*(0.834836569187845)+(f)*(0.555656932954595)+(j)*(1.03585433030833))*(d)+((b)*(9.76575358263375)+(f)*(1.03585433030833)+(j)*(6.84209765783974))*(h)=0,((c)*(18.9052758456522)+(g)*(0.834836569187845)+(k)*(9.76575358263375))*(a)+((c)*(0.834836569187845)+(g)*(0.555656932954595)+(k)*(1.03585433030833))*(d)+((c)*(9.76575358263375)+(g)*(1.03585433030833)+(k)*(6.84209765783974))*(h)=0,((a)*(18.9052758456522)+(d)*(0.834836569187845)+(h)*(9.76575358263375))*(b)+((a)*(0.834836569187845)+(d)*(0.555656932954595)+(h)*(1.03585433030833))*(f)+((a)*(9.76575358263375)+(d)*(1.03585433030833)+(h)*(6.84209765783974))*(j)=0,((b)*(18.9052758456522)+(f)*(0.834836569187845)+(j)*(9.76575358263375))*(b)+((b)*(0.834836569187845)+(f)*(0.555656932954595)+(j)*(1.03585433030833))*(f)+((b)*(9.76575358263375)+(f)*(1.03585433030833)+(j)*(6.84209765783974))*(j)=1,((c)*(18.9052758456522)+(g)*(0.834836569187845)+(k)*(9.76575358263375))*(b)+((c)*(0.834836569187845)+(g)*(0.555656932954595)+(k)*(1.03585433030833))*(f)+((c)*(9.76575358263375)+(g)*(1.03585433030833)+(k)*(6.84209765783974))*(j)=0,((a)*(18.9052758456522)+(d)*(0.834836569187845)+(h)*(9.76575358263375))*(c)+((a)*(0.834836569187845)+(d)*(0.555656932954595)+(h)*(1.03585433030833))*(g)+((a)*(9.76575358263375)+(d)*(1.03585433030833)+(h)*(6.84209765783974))*(k)=0,((b)*(18.9052758456522)+(f)*(0.834836569187845)+(j)*(9.76575358263375))*(c)+((b)*(0.834836569187845)+(f)*(0.555656932954595)+(j)*(1.03585433030833))*(g)+((b)*(9.76575358263375)+(f)*(1.03585433030833)+(j)*(6.84209765783974))*(k)=0,((c)*(18.9052758456522)+(g)*(0.834836569187845)+(k)*(9.76575358263375))*(c)+((c)*(0.834836569187845)+(g)*(0.555656932954595)+(k)*(1.03585433030833))*(g)+((c)*(9.76575358263375)+(g)*(1.03585433030833)+(k)*(6.84209765783974))*(k)=1
但是Wolfram Alpha无法识别这个输入。
由于对此有一些混淆,为了澄清问题,请在最后的注释中找到给定输入A的p。
1)特征假设A是一个对称正定矩阵,在问题中是:
P <- with(eigen(A), diag(1/sqrt(values)) %*% t(vectors))
# check
P %*% A %*% t(P)
## [1,] 1.000000e+00 -6.661338e-16 2.775558e-17
## [2,] -6.661338e-16 1.000000e+00 3.469447e-16
## [3,] 2.775558e-17 1.994932e-16 1.000000e+00
2) optim另一种方法是对6个方程进行数值求解。注意,参数化将P限制为对称的,从而将参数的数量减少到6个。一个完整的3x3=9个参数可能会导致数值问题,因为,正如评论中所指出的,如果P是一个解,那么HP对于任何正交h也是一个解。
# input is upper triangle of P incl diagonal and output is P
p2P <- function(p) {
P <- diag(3)
P[upper.tri(P, diag = TRUE)] <- p
P + t(P) - diag(diag(P))
}
# sum of squares of difference between PAP' and diag(3)
f <- function(p) {
P <- p2P(p)
sum(c(P %*% A %*% t(P) - diag(3))^2)
}
res <- optim(1:6, f, method = "BFGS")
str(res)
## List of 5
## $ par : num [1:6] -0.2605 0.2573 0.5745 -0.0776 -0.027 ...
## $ value : num 4.69e-10
## $ counts : Named int [1:2] 167 49
## ..- attr(*, "names")= chr [1:2] "function" "gradient"
## $ convergence: int 0
## $ message : NULL
# check that PAP' is diag(3)
P <- p2P(res$par)
P %*% A %*% t(P)
## [,1] [,2] [,3]
## [1,] 9.999948e-01 -8.654260e-08 8.333054e-06
## [2,] -8.654260e-08 9.999955e-01 3.266114e-07
## [3,] 8.333054e-06 3.266114e-07 9.999832e-01
3) nls我们可以交替使用nls
来解方程。我们从上面使用p2P,但我们不需要f。注意,P是对称的。
result2 <- nls(c(diag(3)) ~ c(p2P(p) %*% A %*% t(p2P(p))),
start = list(p = 1:6), algorithm = "port")
result2
## Nonlinear regression model
## model: c(diag(3)) ~ c(p2P(p) %*% A %*% t(p2P(p)))
## data: parent.frame()
## p1 p2 p3 p4 p5 p6
## 0.26049 -0.25733 -0.57448 0.07757 0.02702 0.22665
## residual sum-of-squares: 1.354e-22
##
## Algorithm "port", convergence message: absolute function convergence (6)
# check that PAP' equals diag(3)
P <- p2P(coef(result2))
P %*% A %*% t(P)
## [,1] [,2] [,3]
## [1,] 1.000000e+00 7.954436e-13 5.084821e-14
## [2,] 7.954401e-13 1.000000e+00 -1.869643e-12
## [3,] 5.073719e-14 -1.869574e-12 1.000000e+00
4) nls-2这也使用nls,但使用P的不同参数化,即对角矩阵(3个参数)乘以斜对称矩阵(3个参数)的指数。注意,斜对称矩阵的指数是正交的,因此参数化将P约束为对角线乘以正交。
library(expm)
p2P4 <- function(p) {
X <- matrix(0, 3, 3)
X[upper.tri(X)] <- p[1:3]
diag(p[4:6]) %*% expm(X - t(X))
}
res4 <- nls(c(diag(3)) ~ c(p2P4(p) %*% A %*% t(p2P4(p))), start = list(p = 1:6),
algorithm = "port")
res4
## Nonlinear regression model
## model: c(diag(3)) ~ c(p2P4(p) %*% A %*% t(p2P4(p)))
## data: parent.frame()
## p1 p2 p3 p4 p5 p6
## 3.3142 1.9743 1.6253 0.6500 0.1963 0.3663
## residual sum-of-squares: 2.863e-30
##
## Algorithm "port", convergence message: X-convergence (3)
# check that PAP' equals diag(3)
P <- p2P4(coef(res4))
P %*% A %*% t(P)
## [,1] [,2] [,3]
## [1,] 1.000000e+00 9.367507e-16 -2.827599e-16
## [2,] 9.159340e-16 1.000000e+00 -6.661338e-16
## [3,] -2.289835e-16 -6.106227e-16 1.000000e+00
注意
我们用这个来表示A:
Lines <- "
10.166746 -2.619763 -6.717475
-2.619763 3.291888 3.052898
-6.717475 3.052898 22.313351"
A <- as.matrix(read.table(text = Lines))
正如你所注意到的,如果你把A分解成代数的平方根
A = L * L^T
那么问题就变成了:
(P*L) * (P*L)^T = Id
这意味着P*L
是一个酉矩阵https://en.wikipedia.org/wiki/Unitary_matrix。
解由任意酉矩阵U, (U * U^T=Id
)
P=inv(L)*U
有无穷多个解。
您明确地声明您不希望U=Id
具有平凡的解决方案P=inv(L)
,但您没有说明原因,并且您没有提供对p的额外约束…
注意@G的好答案中的第一个解。Grothendieck正是这样:用特征向量V
将A化简成对角线形式D(特征值)inv(V)*A*V = D
D = sqrt(D)*sqrt(D)
P = inv(sqrt(D))*V