贪婪算法在解决哈夫曼树及编码问题中的应用
哈夫曼編碼,是一種可變字長編碼(VLC)的高效算法。該算法是Huffman于1952年提出一種編碼方法,該方法完全依據字符出現概率來構造異字頭的平均長度最短的碼字,有時稱之為最佳編碼。
相比定長編碼來說,這種編碼實現的壓縮率(衡量壓縮算法效率的重要指標)非常高,也就是說,哈夫曼編碼比定長編碼占用更少的存儲空間。
假設我們要對某個字母表創建一套二進制前綴碼,那么我們一般都會講字母表中的字符與二進制的葉子聯系起來,樹中所有的左向邊都為0,右向邊都為1.可以通過記錄根到字符葉子的簡單路徑上的標記來獲得一個字符的代碼字。這樣任何一棵這樣的樹都可以生成一套前綴碼。但是我們都知道即使在英文單詞中,每個字母出現的概率都是不同的,如果僅僅是放到二叉樹中,對于一些高頻字符很可能需要更長的代碼串來表示,這是非常不友好的。
一個方法就是通過根據字符出現的概率,盡可能將短位串分配給高頻字符,長位串分配給低頻字符。這里就用到了貪婪思想。
思路:
1. 初始化n個單節點的數,表上字母表中的字符,并將其概率記錄,用來表示權重。
2. 找出兩顆權重最小的樹(對于權重都相同的樹,任選其一),將它們作為新樹中的左右子樹,并將權值之和記錄到新的樹根中。迭代這一步操作,直到剩下一棵單獨的樹。
以下面的例子來描述哈夫曼樹的構造過程:
| 字符 | A | B | C | D | _ |
| 出現概率 | 0.35 | 0.1 | 0.2 | 0.2 | 0.15 |
由上面的過程我們得到了下面的代碼字:
| 字符 | A | B | C | D | _ |
| 出現概率 | 0.35 | 0.1 | 0.2 | 0.2 | 0.15 |
| 代碼字 | 11 | 100 | 00 | 01 | 101 |
Input:
5
A B C D _
35 10 20 20 15
Output:
A : 11
B : 100
C : 00
D : 01
_ : 101
完整代碼如下:
import java.util.Scanner; public class Main {//建立數的節點類static class Node{int weight;//頻數int parent;int leftChild;int rightChild;public Node(int weight, int parent, int leftChild, int rightChild){this.weight = weight;this.parent = parent;this.leftChild = leftChild;this.rightChild = rightChild;}void setWeight(int weight){this.weight = weight;}void setParent(int parent){this.parent = parent;}void setLeftChild(int leftChild){this.leftChild = leftChild;}void setRightChild(int rightChild){this.rightChild = rightChild;}int getWeight(){return weight;}int getParent(){return parent;}int getLeftChild(){return leftChild;}int getRightChild(){return rightChild;}}//新建哈夫曼編碼static class NodeCode {String character;String code;NodeCode(String character, String code) {this.character = character;this.code = code;}NodeCode(String code) {this.code = code;}void setCharacter(String character) {this.character = character;}void setCode(String code) {this.code = code;}String getCharacter() {return character;}String getCode() {return code;}}//初始化一個哈弗曼樹public static void initHuffmanTree(Node[] huffmanTree, int m){for(int i = 0; i < m; i++){huffmanTree[i] = new Node(0, -1, -1, -1);}}//初始化編碼public static void initHuffmanCode(NodeCode[] huffmanCode, int n){for(int i = 0; i < n; i++){huffmanCode[i] = new NodeCode("","");}}//獲取huffmanCode的符號public static void getHuffmanCode(NodeCode[] huffmanCode, int n){Scanner input = new Scanner(System.in);for(int i = 0; i < n; i++){String temp = input.next();huffmanCode[i] = new NodeCode(temp,"");}}//獲取頻率public static void getHuffmanWeight(Node[] huffmanTree , int n){Scanner input = new Scanner(System.in);for(int i = 0; i < n;i ++){int temp = input.nextInt();huffmanTree[i] = new Node(temp, -1, -1, -1);}}//選取兩個較小的結點public static int[] selectMin(Node[] huffmanTree ,int n) {int min[] = new int[2];class TempNode {int newWeight;//存儲權int place;//存儲該結點所在的位置TempNode(int newWeight, int place){this.newWeight = newWeight;this.place = place;}void setNewWeight(int newWeight){this.newWeight = newWeight;}void setPlace(int place){this.place = place;}int getNewWeight(){return newWeight;}int getPlace(){return place;}}TempNode[] tempTree = new TempNode[n];//將huffmanTree中沒有雙親的結點存儲到tempTree中int i=0,j=0;for(i = 0; i < n; i++) {if(huffmanTree[i].getParent() == -1 && huffmanTree[i].getWeight()!=0) {tempTree[j] = new TempNode(huffmanTree[i].getWeight(),i);j++;}}int m1,m2;m1 = m2 = 0;for(i = 0; i < j; i++) {if(tempTree[i].getNewWeight() < tempTree[m1].getNewWeight())//此處不讓取到相等,是因為結點中有相同權值的時候,m1取最前的m1 = i;}for(i = 0; i < j; i++) {if(m1 == m2)m2++;//當m1在第一個位置的時候,m2向后移一位if(tempTree[i].getNewWeight() <= tempTree[m2].getNewWeight() && i != m1)//此處取到相等,是讓在結點中有相同的權值的時候,//m2取最后的那個。m2 = i;}min[0] = tempTree[m1].getPlace();min[1] = tempTree[m2].getPlace();return min;}//創建哈弗曼樹public static void createHaffmanTree(Node[] huffmanTree,int n){if(n <= 1)System.out.println("Parameter Error!");int m = 2 * n - 1;//initHuffmanTree(huffmanTree,m);for(int i = n; i < m; i++) {int[] min = selectMin(huffmanTree, i);int min1 = min[0];int min2 = min[1];huffmanTree[min1].setParent(i);huffmanTree[min2].setParent(i);huffmanTree[i].setLeftChild(min1);huffmanTree[i].setRightChild(min2);huffmanTree[i].setWeight(huffmanTree[min1].getWeight() + huffmanTree[min2].getWeight());}}//創建哈夫曼編碼public static void createHaffmanCode(Node[] huffmanTree,NodeCode[] huffmanCode,int n){Scanner input = new Scanner(System.in);char[] code = new char[10];int start;int c;int parent;int temp;code[n-1] = '0';for(int i = 0; i < n; i++){StringBuffer stringBuffer = new StringBuffer();start = n-1;c = i;while((parent=huffmanTree[c].getParent()) >= 0){start--;code[start] = ((huffmanTree[parent].getLeftChild() == c) ? '0' : '1');c = parent;}for(;start < n-1; start++){stringBuffer.append(code[start]);}huffmanCode[i].setCode(stringBuffer.toString());}}//輸出public static void ouputHaffmanCode(NodeCode[] huffmanCode,int n){for(int i = 0; i < n; i++){System.out.println(huffmanCode[i].getCharacter() + " : " + huffmanCode[i].getCode());}}//主函數public static void main(String[] args){Scanner input = new Scanner(System.in);int n;int m;n = input.nextInt();m = 2*n-1;Node[] huffmanTree = new Node[m];NodeCode[] huffmanCode = new NodeCode[n];//初始化initHuffmanTree(huffmanTree, m);initHuffmanCode(huffmanCode, n);//獲取符號getHuffmanCode(huffmanCode, n);//獲取概率getHuffmanWeight(huffmanTree, n);//創建哈夫曼樹createHaffmanTree(huffmanTree, n);//創建哈夫曼編碼createHaffmanCode(huffmanTree, huffmanCode, n);//輸出ouputHaffmanCode(huffmanCode, n);} }注意:輸出哈夫曼樹和輸出哈夫曼編碼時不同的操作。總結
以上是生活随笔為你收集整理的贪婪算法在解决哈夫曼树及编码问题中的应用的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: c++教程下载
- 下一篇: 自定义枚举typeHandler