我写了一个程序,可以查找长度为 K 的递增子序列的总数。
该计划以两种方式实施。
-
朴素递归方式 increase_subseq_k((
-
自上而下的 DP 方式 increase_subseq_k_top_down((
法典:
using namespace std;
using namespace std::chrono;
int helper(vector<int> v, int k, int idx){
if(k==0) return 0;
if(k==1) return 1;
int count=0;
for(int i=idx+1; i<v.size(); i++)
{
if(v[i]>v[idx]){
count+=helper(v,k-1,i);
}
}
return count;
}
int increase_subseq_k(vector<int> v, int k){
int count=0;
for(int i=0; i<v.size(); i++)
{
count+=helper(v,k,i);
}
return count;
}
int helper_top_down(vector<int> v, int k, int idx, vector<vector<int>>& dp){
if(k==0){
dp[k][idx]=0;
return 0;
}
if(k==1){
dp[k][idx]=1;
return 1;
}
if(dp[k][idx]!=-1) return dp[k][idx];
int count=0;
for(int i=idx+1; i<v.size(); i++)
{
if(v[i]>v[idx]){
count+=helper_top_down(v,k-1,i, dp);
}
}
dp[k][idx]=count;
return count;
}
int increase_subseq_k_top_down(vector<int> v, int k){
vector<vector<int>> dp(k+1, vector<int>(v.size(), -1));
int count=0;
for(int i=0; i<v.size(); i++)
{
count+=helper_top_down(v,k,i, dp);
}
return count;
}
int main()
{
vector<int> v = {12, 8, 11, 13, 10, 15, 14, 16, 20};
high_resolution_clock::time_point t1 = high_resolution_clock::now();
cout<<increase_subseq_k(v, 4)<<endl;
high_resolution_clock::time_point t2 = high_resolution_clock::now();
auto duration = duration_cast<microseconds>( t2 - t1 ).count();
cout<<duration<<endl;
high_resolution_clock::time_point t3 = high_resolution_clock::now();
cout<<increase_subseq_k_top_down(v, 4)<<endl;
high_resolution_clock::time_point t4 = high_resolution_clock::now();
auto duration2 = duration_cast<microseconds>( t4 - t3 ).count();
cout<<duration2<<endl;
}
我的问题是:我正在尝试计算 2 种不同方法的执行时间,但我得到的数字非常相似,这意味着自上而下的 DP 方法实际上根本没有改善算法的运行时间。
任何见解将不胜感激!谢谢
- 你没有提到你得到的时间。我在2018年的MacBook Pro,i7 3.1 GHz上对其进行了测试,递归实现的时间约为46毫秒,而动态编程实现的时间约为13毫秒。但我将在下面解释为什么应该丢弃这些值。
- 正如蒂默曼斯@Matt在评论中指出的那样,用这样的小输入测量一个小函数一次并不能给出可靠的时间。相反,我建议你使用微基准框架。这是我使用"谷歌基准测试"的实现。
- 您正在测量
std::cout << ... << std::endl;
.std::endl
包括非常慢的std::flush
。
复杂性
您的两个解决方案是 O(N^2( 和 O(N^3(,其中 N 等于代码中的v.size()
。它还依赖于 K,其中 K 是函数的第二个参数。(这方面的数字并不令人信服,也许是log K和Klog K,但仅适用于固定为30的N(。
复杂度计算的代码可以在最后找到。这是结果表。查找BM_Generic<increase_subseq_k>_BigO 289.38 N^3 288.28 N^3
和BM_Generic<increase_subseq_k_top_down>_BigO 6.20 N^2 6.20 N^2
行:
---------------------------------------------------------------------------------------
Benchmark Time CPU Iterations
---------------------------------------------------------------------------------------
BM_Generic<increase_subseq_k>/5/5/42 40 ns 40 ns 13791473
BM_Generic<increase_subseq_k>/10/5/42 267 ns 266 ns 2481574
BM_Generic<increase_subseq_k>/20/5/42 2621 ns 2613 ns 257017
BM_Generic<increase_subseq_k>/40/5/42 61676 ns 61502 ns 11720
BM_Generic<increase_subseq_k>/80/5/42 1424755 ns 1420357 ns 502
BM_Generic<increase_subseq_k>/160/5/42 49581330 ns 49290286 ns 14
BM_Generic<increase_subseq_k>/320/5/42 2023321331 ns 2022037000 ns 1
BM_Generic<increase_subseq_k>/640/5/42 76810469575 ns 76516177000 ns 1
BM_Generic<increase_subseq_k>_BigO 289.38 N^3 288.28 N^3
BM_Generic<increase_subseq_k>_RMS 27 % 27 %
BM_Generic<increase_subseq_k_top_down>/5/5/42 839 ns 839 ns 756855
BM_Generic<increase_subseq_k_top_down>/10/5/42 1043 ns 1042 ns 663193
BM_Generic<increase_subseq_k_top_down>/20/5/42 1795 ns 1794 ns 386905
BM_Generic<increase_subseq_k_top_down>/40/5/42 5996 ns 5995 ns 119031
BM_Generic<increase_subseq_k_top_down>/80/5/42 31001 ns 30993 ns 22078
BM_Generic<increase_subseq_k_top_down>/160/5/42 150695 ns 150350 ns 4748
BM_Generic<increase_subseq_k_top_down>/320/5/42 631402 ns 630742 ns 1155
BM_Generic<increase_subseq_k_top_down>/640/5/42 2541040 ns 2540148 ns 277
BM_Generic<increase_subseq_k_top_down>_BigO 6.20 N^2 6.20 N^2
BM_Generic<increase_subseq_k_top_down>_RMS 1 % 1 %
下面关于我的解决方案的说明
- 基准测试适用于固定输入。随着输入大小的增加,您可能关心性能。我还没有实现这个。以下是如何执行此操作的文档:https://github.com/google/benchmark 。请参阅下面的更新!
- 带有
std::cout
的代码打印到命令行,您需要使用grep BM
或类似方式对其进行过滤,以仅获取所需的输出。
基准测试结果
2018-07-03 20:50:35
Running ./benchmark_main
Run on (8 X 3100 MHz CPU s)
CPU Caches:
L1 Data 32K (x4)
L1 Instruction 32K (x4)
L2 Unified 262K (x4)
L3 Unified 8388K (x1)
------------------------------------------------------------
Benchmark Time CPU Iterations
------------------------------------------------------------
BM_Subseq 15086 ns 15079 ns 40718
BM_SubseqTopDown 9198 ns 9196 ns 70722
BM_SubseqNoIO 11782 ns 11774 ns 55523
BM_SubseqTopDownNoIO 6391 ns 6384 ns 108056
基准测试代码
#include <benchmark/benchmark.h>
#include "subsequence.h"
static void BM_Subseq(benchmark::State &state) {
std::vector<int> v = {12, 8, 11, 13, 10, 15, 14, 16, 20};
for (auto _ : state) {
std::cout << increase_subseq_k(v, 4) << std::endl;
}
}
static void BM_SubseqNoIO(benchmark::State &state) {
std::vector<int> v = {12, 8, 11, 13, 10, 15, 14, 16, 20};
auto t1 = std::chrono::high_resolution_clock::now();
for (auto _ : state) {
benchmark::DoNotOptimize(increase_subseq_k(v, 4));
}
}
static void BM_SubseqTopDown(benchmark::State &state) {
std::vector<int> v = {12, 8, 11, 13, 10, 15, 14, 16, 20};
for (auto _ : state) {
std::cout << increase_subseq_k_top_down(v, 4) << std::endl;
}
}
static void BM_SubseqTopDownNoIO(benchmark::State &state) {
std::vector<int> v = {12, 8, 11, 13, 10, 15, 14, 16, 20};
for (auto _ : state) {
benchmark::DoNotOptimize(increase_subseq_k_top_down(v, 4));
}
}
BENCHMARK(BM_Subseq);
BENCHMARK(BM_SubseqTopDown);
BENCHMARK(BM_SubseqNoIO);
BENCHMARK(BM_SubseqTopDownNoIO);
BENCHMARK_MAIN();
更新:复杂性代码
下面是用于计算复杂性的代码。
#include <random>
#include <benchmark/benchmark.h>
#include "subsequence.h"
std::vector<int> GetRandomVector(std::size_t size, int seed) {
std::vector<int> result;
result.reserve(size);
std::mt19937 gen(seed);
// TODO: What should be the right distribution?
std::uniform_int_distribution<> dis(0, 100);
for (std::size_t i = 0; i < size; i++) {
result.push_back(dis(gen));
}
return result;
}
/** Wrapper code to benchmark a function F (which is compiled into the code as a
* template argument). We use it here to evaluate on "increase_subseq_k" and
* "increase_subseq_k_top_down".
*
* @tparam F Function to benchmark
*/
template<int (*F)(const std::vector<int> &, int)>
static void BM_Generic(benchmark::State &state) {
std::vector<int> v = GetRandomVector(state.range(0), state.range(2));
for (auto _ : state) {
benchmark::DoNotOptimize(F(v, state.range(1)));
}
state.SetComplexityN(state.range(0));
}
/** Generates custom arguments of triples (n, k, seed). Seed is used for the
* random vector generator and hardcoded to 42.
*
* Play around with this function. I've left the clearest example as default
* (showing very little variation from the O(N^2) and O(N^3) of the two
* implementations. But also interesting is to generate different values for 'k'
* while varying n. However, google-benchmark only does complexity analysis on
* one variable, so the analysis would have to be done in a different tool.
*
* Just iterating over k gives log K and K log K complexity, but only when I
* kept n=30.
*/
static void CustomArguments(benchmark::internal::Benchmark *b) {
constexpr int seed = 42;
const int i = 5;
// const int n = 30;
for (int n = std::max(1, i); n < 1000; n *= 2)
// for (int i = 1; i <= n; i++)
b->Args({n, i, seed});
}
BENCHMARK_TEMPLATE(BM_Generic, increase_subseq_k)->Apply(
CustomArguments)->Complexity();
BENCHMARK_TEMPLATE(BM_Generic, increase_subseq_k_top_down)->Apply(
CustomArguments)->Complexity();
BENCHMARK_MAIN();