java 字符串词频统计实例代码

2022-10-20,,,,

复制代码 代码如下:
package com.gpdi.action;

import java.util.arraylist;
import java.util.collections;
import java.util.hashmap;
import java.util.list;
import java.util.map;

public class wordsstatistics {

    class obj {
        int count ;
        obj(int count){
            this.count = count;
        }
    }

    public list<wordcount> statistics(string word) {
        list<wordcount> rs = new arraylist<wordcount>();
        map <string,obj> map = new hashmap<string,obj>();

        if(word == null ) {
            return null;
        }
        word = word.tolowercase();
        word = word.replaceall("'s", "");
        word = word.replaceall(",", "");
        word = word.replaceall("-", "");
        word = word.replaceall("\\.", "");
        word = word.replaceall("'", "");
        word = word.replaceall(":", "");
        word = word.replaceall("!", "");
        word = word.replaceall("\n", "");

        string [] wordarray = word.split(" ");
        for(string simpleword : wordarray) {
            simpleword = simpleword.trim(); 
            if (simpleword != null && !simpleword.equalsignorecase("")) {
                obj cnt = map.get(simpleword);
                if ( cnt!= null ) {
                    cnt.count++;
                }else {
                    map.put(simpleword, new obj(1));
                }
            }
        }

        for(string key : map.keyset()) {
            wordcount wd = new wordcount(key,map.get(key).count);
            rs.add(wd);
        }

        collections.sort(rs, new java.util.comparator<wordcount>(){
            @override
            public int compare(wordcount o1, wordcount o2) {
                int result = 0 ;
                if (o1.getcount() > o2.getcount() ) {
                    result = -1;
                }else if (o1.getcount() < o2.getcount()) {
                    result = 1;
                }else {
                    int strrs = o1.getword().comparetoignorecase(o2.getword());
                    if ( strrs > 0 ) {
                        result = 1;
                    }else {
                        result = -1 ;
                    }
                }
                return result;
            }

        });
        return rs;
    }

     
    public static void main(string args[]) {
        string word = "pinterest is might be aa ab aa ab marketer's dream  - ths site is largely used to curate products " ;
        wordsstatistics s = new wordsstatistics();
        list<wordcount> rs = s.statistics(word);
        for(wordcount word1 : rs) {
            system.out.println(word1.getword()+"*"+word1.getcount());
        }
    }

}

《java 字符串词频统计实例代码.doc》

下载本文的Word格式文档,以方便收藏与打印。