解释装配函数行为的建议



我对汇编相对陌生,遇到了一个让我困惑的函数。在调试过程中,我发现$rdi寄存器最初设置为0x55555575a1b8。从本质上讲,我希望返回任何东西的值,但不是负值或零。最初,将$esi设置为0x5575a1b8似乎会返回0x1。事实上,它返回了-1。进一步的检查表明,有几个两个递归函数调用,每个调用都将返回值保存到寄存器中,以便稍后进行比较:

r12 = Find(*(rdi + 0x8), rsi);
rax = Find(*(rdi + 0x10), rsi);

在这之后,我迷失在代码中。。。

0000000000001b62 <Find>:
1b62:   48 85 ff                test   %rdi,%rdi
1b65:   74 3c                   je     1ba3 <Find+0x41>
1b67:   39 37                   cmp    %esi,(%rdi)
1b69:   74 3e                   je     1ba9 <Find+0x47>
1b6b:   41 54                   push   %r12
1b6d:   55                      push   %rbp
1b6e:   53                      push   %rbx
1b6f:   89 f5                   mov    %esi,%ebp
1b71:   48 89 fb                mov    %rdi,%rbx
1b74:   48 8b 7f 08             mov    0x8(%rdi),%rdi
1b78:   e8 e5 ff ff ff          callq  1b62 <Find>
1b7d:   41 89 c4                mov    %eax,%r12d
1b80:   48 8b 7b 10             mov    0x10(%rbx),%rdi
1b84:   89 ee                   mov    %ebp,%esi
1b86:   e8 d7 ff ff ff          callq  1b62 <Find>
1b8b:   45 85 e4                test   %r12d,%r12d
1b8e:   7e 09                   jle    1b99 <Find+0x37>
1b90:   43 8d 04 24             lea    (%r12,%r12,1),%eax
1b94:   5b                      pop    %rbx
1b95:   5d                      pop    %rbp
1b96:   41 5c                   pop    %r12
1b98:   c3                      retq   
1b99:   85 c0                   test   %eax,%eax
1b9b:   7e 12                   jle    1baf <Find+0x4d>
1b9d:   8d 44 00 01             lea    0x1(%rax,%rax,1),%eax
1ba1:   eb f1                   jmp    1b94 <Find+0x32>
1ba3:   b8 ff ff ff ff          mov    $0xffffffff,%eax
1ba8:   c3                      retq   
1ba9:   b8 01 00 00 00          mov    $0x1,%eax
1bae:   c3                      retq   
1baf:   b8 ff ff ff ff          mov    $0xffffffff,%eax
1bb4:   eb de                   jmp    1b94 <Find+0x32>

对该方法行为的任何深入了解都将不胜感激。

0000000000001b62 <Find>:
; if (null == rdi) return -1;
1b62:   48 85 ff                test   %rdi,%rdi
1b65:   74 3c                   je     1ba3 <Find+0x41>
; if (esi == *rdi) return 1;
1b67:   39 37                   cmp    %esi,(%rdi)
1b69:   74 3e                   je     1ba9 <Find+0x47>
; preserving r12, rbp, rbx
1b6b:   41 54                   push   %r12
1b6d:   55                      push   %rbp
1b6e:   53                      push   %rbx
; r12d = Find( *(rdi+8), esi );
1b6f:   89 f5                   mov    %esi,%ebp
1b71:   48 89 fb                mov    %rdi,%rbx
1b74:   48 8b 7f 08             mov    0x8(%rdi),%rdi
1b78:   e8 e5 ff ff ff          callq  1b62 <Find>
1b7d:   41 89 c4                mov    %eax,%r12d
; eax = Find( *(rdi_orig+16), esi_orig)
1b80:   48 8b 7b 10             mov    0x10(%rbx),%rdi
1b84:   89 ee                   mov    %ebp,%esi
1b86:   e8 d7 ff ff ff          callq  1b62 <Find>
; if (r12d <= 0 && eax <= 0) return -1; (with restoring regs)
; if (r12d <= 0 && 0 < eax) return 2*eax + 1; (with restoring regs)
1b8b:   45 85 e4                test   %r12d,%r12d
1b8e:   7e 09                   jle    1b99 <Find+0x37>
; return 2*r12d; with restoring regs
1b90:   43 8d 04 24             lea    (%r12,%r12,1),%eax
1b94:   5b                      pop    %rbx
1b95:   5d                      pop    %rbp
1b96:   41 5c                   pop    %r12
1b98:   c3                      retq   
1b99:   85 c0                   test   %eax,%eax
1b9b:   7e 12                   jle    1baf <Find+0x4d>
1b9d:   8d 44 00 01             lea    0x1(%rax,%rax,1),%eax
1ba1:   eb f1                   jmp    1b94 <Find+0x32>
1ba3:   b8 ff ff ff ff          mov    $0xffffffff,%eax
1ba8:   c3                      retq   
1ba9:   b8 01 00 00 00          mov    $0x1,%eax
1bae:   c3                      retq   
1baf:   b8 ff ff ff ff          mov    $0xffffffff,%eax
1bb4:   eb de                   jmp    1b94 <Find+0x32>

因此,在类似C的代码中,我认为(没有验证或调试,甚至只是编译):

struct node_s {
int32_t value;
int32_t _padding; // left starts at offset 8
node_s* left;
node_s* right;
};
int32_t Find(node_s* node, int32_t i) {
if (nullptr == node) return -1;
if (i == node->value) return 1;
int32_t f1 = Find( node->left, i);
int32_t f2 = Find( node->right, i);
if (0 < f1) return 2 * f1;
if (0 < f2) return 2 * f2 + 1;
return -1;
}

在我看来,它是一种未优化的方式来返回二进制树中特定值的位置。

假设你有这样的树:

n0: {10, n1, n2}
n1: {11, n3, n4}
n2: {12, n5, n6}
n3: {13, nullptr, n7}
n4: {14, nullptr, nullptr}
n5: {15, nullptr, n8}
n6: {16, nullptr, nullptr}
n7: {17, nullptr, nullptr}
n8: {18, nullptr, nullptr}

10
/      
11            12
/            /    
13      14    15      16
             
17            18

那么我猜:

Find(n0, 99) = -1
Find(n0, 10) = 1
Find(n0, 11) = 2
Find(n0, 12) = 3
Find(n1, 11) = 1
Find(n0, 17) = 12
Find(n1, 17) = 6
Find(n3, 17) = 3
Find(n7, 17) = 1
...

或者当从n0节点搜索特定值时,返回值将是(我希望我没有做错):

1
/      
2             3
/            /    
4       6     5       7
             
12             13

因此,它将整个"floor"(链接深度)返回为某个范围ii+f_n-1(f_n=BT中地板上可能的最大节点数量),其中范围的前半部分是左节点,范围的后半部分是右节点。

现在,如果您在机器中有这些代码,您可以尝试重新创建这样的树结构作为输入,如果我猜对了,请告诉我。:)

编辑:没有。底部应该是12、14,而不是12、13,所以顺序比漂亮的左/右分割更棘手。。。太累了,想不出该怎么命名。当你有从根到值的路径时,看起来像是某种字典排序(但排序是向后的,就像反向字符串,或者从值到顶部的路径):

1 : ""
2 : L
3 : R
4 : LL
5 : RL
6 : LR
7 : RR
8 : LLL
9 : RLL
10: LRL
11: RRL
12: LLR
13: RLR
14: LRR
15: RRR
16: LLLL
17: RLLL
18: LRLL
...

最新更新