中位数中的奇怪错误作为枢轴来查找 Kth 最大元素



终于发现了这个bug,请看第二个EDIT的
但是还有一个问题,你看,"getMedian()"方法的最后第四行,我对 medians[] 数组进行排序以找到中位数的中位数。我个人认为这打破了最坏情况的时间复杂度 = O(n),正确吗?


我尝试使用中位数作为枢轴来对数组进行分区以找到第 K 个最大元素。

但是这个错误太奇怪了:例如{8, 5, 2, 0, 9, 1},输出是:

  • k = 1,输出>> 9。
  • k = 2,输出>> 9。
  • k = 3,输出>> 5。
  • k = 4,输出>> 2。
  • k = 5,输出>> 1。
  • k = 6,输出>> 0。
  • 第二个max元素的输出是错误的,其他的很好。
public class FindKthMax_MOM {
// Find Kth max by median of medians pivot method.
// Time complexity, worst case = O(n).
// n is the num of the elem in the given array.
private static LNode <Integer> mergeSortLLForIntDataFromMinToMax (LNode <Integer> head) {
    if (head == null || head.next == null) return head;
    // Get the mid point of this linked list.
    LNode <Integer> preSlower = head;
    LNode <Integer> slower = head;
    LNode <Integer> faster = head;
    while (faster != null && faster.next != null) {
        preSlower = slower;
        slower = slower.next;
        faster = faster.next.next;
    }
    // Cut off this main linked list.
    preSlower.next = null;
    // Do recursion on the left part and the right part.
    LNode <Integer> left = mergeSortLLForIntDataFromMinToMax(head);
    LNode <Integer> right = mergeSortLLForIntDataFromMinToMax(slower);
    // Merge left part and the right part from min to max.
    LNode <Integer> currHead = new LNode <Integer> ();
    LNode <Integer> tempCurrHead = currHead;
    while (left != null && right != null) {
        if (left.data <= right.data) {
            // Add the elem of left part into the linked list.
            tempCurrHead.next = left;
            left = left.next;
        } else {
            // Add the elem of right part into the linked list.
            tempCurrHead.next = right;
            right = right.next;
        }
        tempCurrHead = tempCurrHead.next;
    }
    if (left != null) {
        // Add the remaining part of left part into the main linked list.
        tempCurrHead.next = left;
        left = left.next;
        tempCurrHead = tempCurrHead.next;
    } else if (right != null) {
        // Add the remaining part of right part into the main linked list.
        tempCurrHead.next = right;
        right = right.next;
        tempCurrHead = tempCurrHead.next;
    }
    return currHead.next;
}
private static int partition_second (int[] givenArray, int start, int end, int pivotIndex) {
    int pivot = givenArray[pivotIndex];
    int left = start;
    int right = end;
    while (left <= right) {
        while (givenArray[left] < pivot) left ++;
        while (givenArray[right] > pivot) right --;
        if (left <= right) {
            // Exchange the givenArray[left] and the givenArray[right].
            exchange(givenArray, left, right);
            left ++;
            right --;
        }
    }
    return left;
}
private static void quickSortFromMinToMax (int[] givenArray, int start, int end) {
    if (start >= end) return;
    // Generate a random num in the range[start, end].
    int rand = start + (int)(Math.random() * ((end - start) + 1));
    // Use this random num as the pivot index to partition the given array in the current scope.
    int split = partition_second (givenArray, start, end, rand);
    // Do recursion.
    quickSortFromMinToMax(givenArray, start, split - 1);
    quickSortFromMinToMax(givenArray, split, end);
}
private static int getMedianFromLL (LNode <Integer> head) {
    if (head == null) return Integer.MIN_VALUE;
    // Get the mid point of this linked list.
    LNode <Integer> slower = head;
    LNode <Integer> faster = head;
    while (faster != null && faster.next != null) {
        slower = slower.next;
        faster = faster.next.next;
    }
    return slower.data;
}
private static int getMedian (int[] givenArray, int start, int end) {
    int size = end - start + 1;
    int numOfSubSet = (float)(size) / 5 > (size / 5) ? (size / 5 + 1) : (size / 5);
    if (numOfSubSet <= 1) {
        // Sort this little array, and return its median.
        quickSortFromMinToMax(givenArray, start, end);
        return givenArray[(start + end) / 2];
    }
    // Split this entire given array into subset.
    int givenArrayIndex = start;
    LList <LList<Integer>> mainLL = new LList <LList<Integer>> ();
    for (int i = 0; i < numOfSubSet; i ++) {
        // Use the linked list to store each sub set.
        LList <Integer> subLL = new LList <Integer> ();
        // Load this subLL by the elems of givenArray.
        for (int j = 0; j < 5; j ++) {
            if (givenArrayIndex <= end) {
                subLL.addNode (givenArray[givenArrayIndex]);
                givenArrayIndex ++;
            } else break;
        }
        // Sort this linked list by merge sort.
        subLL.head = mergeSortLLForIntDataFromMinToMax(subLL.head);         
        mainLL.addNode (subLL);
    }
    // Calculate each median for each subset.
    int[] medians = new int[numOfSubSet];
    int mediansIndex = 0;
    LNode <LList<Integer>> tempSubSet = mainLL.head;
    while (tempSubSet != null) {
        medians[mediansIndex] = getMedianFromLL (tempSubSet.data.head);
        mediansIndex ++;
        tempSubSet = tempSubSet.next;
    }
    // Sort the medians array.
    quickSortFromMinToMax (medians, 0, numOfSubSet - 1);
    // Return the median of medians.
    return medians[numOfSubSet / 2];
}
private static void exchange (int[] givenArray, int firstIndex, int secondIndex) {
    int tempElem = givenArray[firstIndex];
    givenArray[firstIndex] = givenArray[secondIndex];
    givenArray[secondIndex] = tempElem;
}
private static int partition (int[] givenArray, int start, int end, int pivot) {
    int left = start;
    int right = end;
    while (left <= right) {
        while (givenArray[left] > pivot) left ++;
        while (givenArray[right] < pivot) right --;
        if (left <= right) {
            // Exchange the givenArray[left] and the givenArray[right].
            exchange(givenArray, left, right);
            left ++;
            right --;
        }
    }
    return left;
}
private static int findKthMax_MOM_Helper (int[] givenArray, int start, int end, int k) {
    if (start > end) return Integer.MIN_VALUE;
    // Get the median of the givenArray in the current scope.
    int median = getMedian (givenArray, start, end);
    // Use this median as the pivot to partition the given array in the current scope.
    int split = partition (givenArray, start, end, median);
    if (k == split) return givenArray[split - 1];
    else if (k < split) return findKthMax_MOM_Helper(givenArray, start, split - 1, k);
    else return findKthMax_MOM_Helper(givenArray, split, end, k);
}
public static int findKthMax_MOM (int[] givenArray, int k) {
    int size = givenArray.length;
    if (k < 1 || k > size) return Integer.MIN_VALUE;
    return findKthMax_MOM_Helper(givenArray, 0, size - 1, k);
}
// Main method to test.
public static void main (String[] args) {
    // Test data: {8, 5, 2, 0, 9, 1}.
    int[] givenArray = {8, 5, 2, 0, 9, 1};
    // Test finding the Kth max by median of medians as pivot method.
    System.out.println("Test finding Kth max by median of medians as pivot method, rest = " + findKthMax_MOM(givenArray, 2));
}
}

另一个问题:

getMedian (int[] givenArray, int start, int end)方法的最后一第二行,我对 median 数组进行排序,我认为此操作打破了 O(n) 的时间复杂度,正确吗?


编辑
我个人认为,该错误可能存在于两个地方:
  • 在《partition (int[] givenArray, int start, int end, int pivot)》中
  • >

[代码]

private static int partition (int[] givenArray, int start, int end, int pivot) {
    int left = start;
    int right = end;
    while (left <= right) {
        while (givenArray[left] > pivot) left ++;
        while (givenArray[right] < pivot) right --;
        if (left <= right) {
            // Exchange the givenArray[left] and the givenArray[right].
            exchange(givenArray, left, right);
            left ++;
            right --;
        }
    }
    return left;
}
  • 在《findKthMax_MOM_Helper (int[] givenArray, int start, int end, int k)》中
  • >

[代码]

private static int findKthMax_MOM_Helper (int[] givenArray, int start, int end, int k) {
    if (start > end) return Integer.MIN_VALUE;
    // Get the median of the givenArray in the current scope.
    int median = getMedian (givenArray, start, end);
    // Use this median as the pivot to partition the given array in the current scope.
    int split = partition (givenArray, start, end, median);
    if (k == split) return givenArray[split - 1];
    else if (k < split) return findKthMax_MOM_Helper(givenArray, start, split - 1, k);
    else return findKthMax_MOM_Helper(givenArray, split, end, k);
}


第二次编辑
最后我发现了这个错误,我的代码的错误部分是"getMedian()"方法,看看该方法的最后三句话。
[代码]
private static int getMedian (int[] givenArray, int start, int end) {
    int size = end - start + 1;
    int numOfSubSet = (float)(size) / 5 > (size / 5) ? (size / 5 + 1) : (size / 5);
    if (numOfSubSet <= 1) {
        // Sort this little array, and return its median.
        quickSortFromMinToMax(givenArray, start, end);
        return givenArray[(start + end) / 2];
    }
    // Split this entire given array into subset.
    int givenArrayIndex = start;
    LList <LList<Integer>> mainLL = new LList <LList<Integer>> ();
    for (int i = 0; i < numOfSubSet; i ++) {
        // Use the linked list to store each sub set.
        LList <Integer> subLL = new LList <Integer> ();
        // Load this subLL by the elems of givenArray.
        for (int j = 0; j < 5; j ++) {
            if (givenArrayIndex <= end) {
                subLL.addNode (givenArray[givenArrayIndex]);
                givenArrayIndex ++;
            } else break;
        }
        // Sort this linked list by merge sort.
        subLL.head = mergeSortLLForIntDataFromMinToMax(subLL.head);
        mainLL.addNode (subLL);
    }
    // Calculate each median for each subset.
    int[] medians = new int[numOfSubSet];
    int mediansIndex = 0;
    LNode <LList<Integer>> tempSubSet = mainLL.head;
    while (tempSubSet != null) {
        medians[mediansIndex] = getMedianFromLL (tempSubSet.data.head);
        mediansIndex ++;
        tempSubSet = tempSubSet.next;
    }
    // Sort the medians array.
    quickSortFromMinToMax (medians, 0, numOfSubSet - 1);
    // Return the median of medians.
    int median = medians[0];
    if (numOfSubSet > 2) median = numOfSubSet % 2 == 0 ? medians[numOfSubSet / 2 - 1] : medians[numOfSubSet / 2];
    return median;
}

抱歉,但我认为错误出在您的LList中。我已经尝试过这种快速而肮脏的LList实现,您的代码工作正常。

public class LList<T> {
    public LNode<T> head;
    public void addNode(T i) {
        LNode<T> lNode = new LNode<>();
        lNode.data = i;
        lNode.next = head;
        head = lNode;
    }
    //only for testing
    @Override
    public String toString() {
        LNode<T> node = head;
        StringBuilder sb = new StringBuilder();
        for (;node!=null;node=node.next){
            sb.append(node.data.toString());
            sb.append(",");
        }
        return sb.toString();
    }
}
public class LNode<T> {
    LNode<T> next;
    T data;
}

给予

System.out.println("Test finding ..., rest = " + findKthMax_MOM(givenArray, 1));
System.out.println("Test finding ..., rest = " + findKthMax_MOM(givenArray, 2));
System.out.println("Test finding ..., rest = " + findKthMax_MOM(givenArray, 3));
System.out.println("Test finding ..., rest = " + findKthMax_MOM(givenArray, 4));
System.out.println("Test finding ..., rest = " + findKthMax_MOM(givenArray, 5));
System.out.println("Test finding ..., rest = " + findKthMax_MOM(givenArray, 6));

预期成果:

Test finding ..., rest = 9
Test finding ..., rest = 8
Test finding ..., rest = 5
Test finding ..., rest = 2
Test finding ..., rest = 1
Test finding ..., rest = 0 

相关内容

  • 没有找到相关文章

最新更新