Java 位图法排序的使用方法

2022-10-20,,,

java jdk里面容器类的排序算法使用的主要是插入排序和归并排序,可能不同版本的实现有所不同,关键代码如下:
复制代码 代码如下:
/**
     * performs a sort on the section of the array between the given indices
     * using a mergesort with exponential search algorithm (in which the merge
     * is performed by exponential search). n*log(n) performance is guaranteed
     * and in the average case it will be faster then any mergesort in which the
     * merge is performed by linear search.
     *
     * @param in -
     *            the array for sorting.
     * @param out -
     *            the result, sorted array.
     * @param start
     *            the start index
     * @param end
     *            the end index + 1
     */
    @suppresswarnings("unchecked")
    private static void mergesort(object[] in, object[] out, int start,
            int end) {
        int len = end - start;
        // use insertion sort for small arrays
        if (len <= simple_length) {
            for (int i = start + 1; i < end; i++) {
                comparable<object> current = (comparable<object>) out[i];
                object prev = out[i - 1];
                if (current.compareto(prev) < 0) {
                    int j = i;
                    do {
                        out[j--] = prev;
                    } while (j > start
                            && current.compareto(prev = out[j - 1]) < 0);
                    out[j] = current;
                }
            }
            return;
        }
        int med = (end + start) >>> 1;
        mergesort(out, in, start, med);
        mergesort(out, in, med, end);

        // merging

        // if arrays are already sorted - no merge
        if (((comparable<object>) in[med - 1]).compareto(in[med]) <= 0) {
            system.arraycopy(in, start, out, start, len);
            return;
        }
        int r = med, i = start;

        // use merging with exponential search
        do {
            comparable<object> fromval = (comparable<object>) in[start];
            comparable<object> rval = (comparable<object>) in[r];
            if (fromval.compareto(rval) <= 0) {
                int l_1 = find(in, rval, -1, start + 1, med - 1);
                int tocopy = l_1 - start + 1;
                system.arraycopy(in, start, out, i, tocopy);
                i += tocopy;
                out[i++] = rval;
                r++;
                start = l_1 + 1;
            } else {
                int r_1 = find(in, fromval, 0, r + 1, end - 1);
                int tocopy = r_1 - r + 1;
                system.arraycopy(in, r, out, i, tocopy);
                i += tocopy;
                out[i++] = fromval;
                start++;
                r = r_1 + 1;
            }
        } while ((end - r) > 0 && (med - start) > 0);

        // copy rest of array
        if ((end - r) <= 0) {
            system.arraycopy(in, start, out, i, med - start);
        } else {
            system.arraycopy(in, r, out, i, end - r);
        }
    }

看到编程珠玑上有一个很有趣的排序算法-位图法其思想是用1位来表示[0~n-1]中的整数是否存在。1表示存在,0表示不存在。即将正整数映射到bit集合中,每一个bit代表其映射的正整数是否存在。

比如{1,2,3,5,8,13}使用下列集合表示:

  0 1 1 1 0 1 0 0 1 0 0 0 0 1 0 0 0 0 0 0

伪代码如下:

for (i  in  [0~n-1])  bit[i] = 0;
for(i  in [0~n-1])
  if (i in input file)     
    bit[i] = 1

for(i  in [0~n-1])
  if(bit[i] == 1) 
    output i

用java 代码尝试下,效率果然不错:
复制代码 代码如下:
public class javauniquesort {
    public static int[] temp = new int[1000001];
    public static list<integer> templist = new arraylist<integer>();
    public static int count;

    public static void main(final string[] args) {
        list<integer> firstnum = new arraylist<integer>();
        list<integer> secondnum = new arraylist<integer>();

        for (int i = 1; i <= 1000000; i++) {
            firstnum.add(i);
            secondnum.add(i);
        }

        collections.shuffle(firstnum);
        collections.shuffle(secondnum);

        getstarttime();
        collections.sort(firstnum);
        getendtime("java sort run time  ");

        getstarttime();
        secondnum = uniquesort(secondnum);
        getendtime("uniquesort run time ");

    }

    public static list<integer> uniquesort(final list<integer> uniquelist) {
        javauniquesort.templist.clear();
        for (int i = 0; i < javauniquesort.temp.length; i++) {
            javauniquesort.temp[i] = 0;
        }
        for (int i = 0; i < uniquelist.size(); i++) {
            javauniquesort.temp[uniquelist.get(i)] = 1;
        }
        for (int i = 0; i < javauniquesort.temp.length; i++) {
            if (javauniquesort.temp[i] == 1) {
                javauniquesort.templist.add(i);
            }
        }

        return javauniquesort.templist;
    }

    public static void getstarttime() {
        javashuffle.start = system.nanotime();
    }

    public static void getendtime(final string s) {
        javashuffle.end = system.nanotime();
        system.out.println(s + ": " + (javashuffle.end - javashuffle.start) + "ns");
    }
}

运行时间:

java sort run time  : 1257737334ns
uniquesort run time : 170228290ns
java sort run time  : 1202749828ns
uniquesort run time : 169327770ns

如果有重复数据,可以修改下:
复制代码 代码如下:
public class javaduplicatesort {
    public static list<integer> templist = new arraylist<integer>();
    public static int count;

    public static void main(final string[] args) {
        random random = new random();
        list<integer> firstnum = new arraylist<integer>();
        list<integer> secondnum = new arraylist<integer>();

        for (int i = 1; i <= 100000; i++) {
            firstnum.add(i);
            secondnum.add(i);
            firstnum.add(random.nextint(i + 1));
            secondnum.add(random.nextint(i + 1));
        }
        collections.shuffle(firstnum);
        collections.shuffle(secondnum);

        getstarttime();
        collections.sort(firstnum);
        getendtime("java sort run time  ");

        getstarttime();
        secondnum = uniquesort(secondnum);
        getendtime("uniquesort run time ");

    }

    public static list<integer> uniquesort(final list<integer> uniquelist) {
        javaduplicatesort.templist.clear();
        int[] temp = new int[200002];
        for (int i = 0; i < temp.length; i++) {
            temp[i] = 0;
        }
        for (int i = 0; i < uniquelist.size(); i++) {
            temp[uniquelist.get(i)]++;
        }
        for (int i = 0; i < temp.length; i++) {
            for (int j = temp[i]; j > 0; j--) {
                javaduplicatesort.templist.add(i);
            }
        }

        return javaduplicatesort.templist;
    }

    public static void getstarttime() {
        javashuffle.start = system.nanotime();
    }

    public static void getendtime(final string s) {
        javashuffle.end = system.nanotime();
        system.out.println(s + ": " + (javashuffle.end - javashuffle.start) + "ns");
    }
}

这种算法还是有很明显的局限性的,比如说要知道数据中最大的数值,更重要的是数据的疏密程度,比如说最大值为1000000而要数组大小只有100,那么效率会下降的非常明显。。。。。但是,使用位图法进行排序,确实让人眼前一亮。位图法通常是用来存储数据,判断某个数据存不存在或者判断数组是否存在重复 。

《Java 位图法排序的使用方法.doc》

下载本文的Word格式文档,以方便收藏与打印。