算法系列之赫夫曼编码实战一【数据压缩、数据解压】
生活随笔
收集整理的這篇文章主要介紹了
算法系列之赫夫曼编码实战一【数据压缩、数据解压】
小編覺得挺不錯的,現(xiàn)在分享給大家,幫大家做個參考.
- 1.何謂赫夫曼編碼?
- 2.赫夫曼數(shù)據(jù)壓縮
- 3.赫夫曼數(shù)據(jù)解壓
- 4.全代碼
1.何謂赫夫曼編碼?
哈夫曼編碼是一種編碼方式,哈夫曼編碼是可變字長編碼的一種。Huffman于1952年提出一種編碼方法,該方法完全依據(jù)字符出現(xiàn)概率來構造異字頭的平均長度最短的碼字,有時稱之為最佳編碼,一般就叫做Huffman編碼(有時也稱為霍夫曼編碼)。
有關哈夫曼樹的相關文章
| 算法系列之赫夫曼樹的精解【構造流程及原理分析】 | https://blog.csdn.net/Kevinnsm/article/details/120584098?spm=1001.2014.3001.5501 |
2.赫夫曼數(shù)據(jù)壓縮
給定一串字符串,將其進行赫夫曼壓縮
1.遍歷字符串,將字符作為key,出現(xiàn)的頻率作為value存儲到map集合中;然后遍歷該map集合,將其封裝到EncryNode對象中,并將其加入到list集合中。 2.根據(jù)List集合構建哈夫曼樹 3.根據(jù)哈夫曼樹生成哈夫曼編碼表 4.將十進制字符串,轉(zhuǎn)換為二進制整數(shù)package com.zsh.algorithm.tree;import java.io.*; import java.util.*;/*** @author:Ronin* @since:2021/9/29* @email:1817937322@qq.com*/public class HuffmanEncrypt {//用來存儲轉(zhuǎn)換二進制字符串集合private static StringBuilder stringBuilder = new StringBuilder();//哈夫曼編碼表private static Map<Byte, String> huffmanCodesMap = new HashMap<>();//---------------------------------------以下是壓縮相關函數(shù)------------------------------------------///*** 赫夫曼編碼封裝方法** @param bytes* @return*/public static byte[] huffmanZip(byte[] bytes) {//1.計算字符串中每個字符出現(xiàn)的次數(shù),并字符、次數(shù)封裝到EncryptNode結點中,將其加入到list集合中List<EncryptNode> encryptNodeList = calByteNums(bytes);//outputList(encryptNodeList);//2.構建哈夫曼樹EncryptNode root = buildHuffmanTree(encryptNodeList);// preErgodic(root);//3.生成赫夫曼編碼表Map<Byte, String> huffmanCodes = getHuffmanCodes(root);//System.out.println(huffmanCodes);//4.轉(zhuǎn)換為赫夫曼編碼字節(jié)表return zip(bytes, huffmanCodes);}/*** 計算字符串中的每個字符的長度** @param bytes 待計算的字符串* @return 返回list集合*/public static List<EncryptNode> calByteNums(byte[] bytes) {Map<Byte, Integer> map = new HashMap<>();byte[] charArray = bytes;for (byte c : charArray) {if (map.containsKey(c)) {int num = map.get(c);map.replace(c, num, num + 1);} else {map.put(c, 1);}}List<EncryptNode> list = new ArrayList<>();for (Map.Entry<Byte, Integer> entry : map.entrySet()) {Byte key = entry.getKey();Integer value = entry.getValue();list.add(new EncryptNode(key, value));}return list;}/*** 構建赫夫曼樹** @param list 權重集合* @return 返回root節(jié)點*/public static EncryptNode buildHuffmanTree(List<EncryptNode> list) {while (list.size() > 1) {Collections.sort(list);EncryptNode leftNode = list.remove(0);EncryptNode rightNode = list.remove(0);EncryptNode parentNode = new EncryptNode(null, leftNode.value + rightNode.value);parentNode.left = leftNode;parentNode.right = rightNode;list.add(parentNode);}return list.remove(0);}/*** 重載getHuffmanCodes(EncryptNode node, String path, StringBuilder stringBuilder)** @param root 根節(jié)點* @return 哈夫曼編碼map集合*/public static Map<Byte, String> getHuffmanCodes(EncryptNode root) {if (root == null) {return null;} else {getHuffmanCodes(root.left, "0", stringBuilder);getHuffmanCodes(root.right, "1", stringBuilder);}return huffmanCodesMap;}/*** 赫夫曼編碼** @param node 結點* @param path 1代表右結點,0代表左結點路徑* @param stringBuilder 用于path拼接*/public static void getHuffmanCodes(EncryptNode node, String path, StringBuilder stringBuilder) {StringBuilder stringBuilder2 = new StringBuilder(stringBuilder);stringBuilder2.append(path);if (node != null) {if (node.c == null) {getHuffmanCodes(node.left, "0", stringBuilder2);getHuffmanCodes(node.right, "1", stringBuilder2);} else {huffmanCodesMap.put(node.c, stringBuilder2.toString());}}}/*** @param bytes 待編碼字節(jié)數(shù)組* @param entry 赫夫曼編碼集合* @return 返回編碼后的byte數(shù)組,將十進制字符串111001100101轉(zhuǎn)換為二進制byte數(shù)組*/public static byte[] zip(byte[] bytes, Map<Byte, String> entry) {StringBuilder stringBuilder = new StringBuilder();for (byte ch : bytes) {stringBuilder.append(entry.get(ch));}//System.out.println("stringBuilder=" + stringBuilder.toString());int length = stringBuilder.length();int len = (length + 7) / 8;byte[] huffmanCodeBytes = new byte[len];int index = 0;for (int k = 0; k < length; k += 8) {String str;if ((k + 8) > length) {str = stringBuilder.substring(k);} else {str = stringBuilder.substring(k, k + 8);}huffmanCodeBytes[index] = (byte) Integer.parseInt(str, 2);index++;}return huffmanCodeBytes;}/*** 計算赫夫曼壓縮率** @param original 未壓縮前的字節(jié)數(shù)組* @param newByte 壓縮后的字節(jié)數(shù)組* @return 壓縮率*/public static String calHuffmanZipRatio(byte[] original, byte[] newByte) {double zipRatio = (double) (original.length - newByte.length) / original.length * 100;String s = String.format("%.2f", zipRatio);return s + "%";}//---------------------------------以上是壓縮相關函數(shù)--------------------------------------------------------////先序遍歷赫夫曼樹public static void preErgodic(EncryptNode cur) {if (cur == null) {return;}System.out.println(cur);preErgodic(cur.left);preErgodic(cur.right);}//打印list集合public static <T> void outputList(List<T> list) {for (T t : list) {System.out.println(t);}} }/*** 實現(xiàn)Comparable接口,便于使用Collections.sort排序*/ class EncryptNode implements Comparable<EncryptNode> {public int value; //出現(xiàn)的次數(shù)public Byte c; //字符public EncryptNode left;public EncryptNode right;public EncryptNode(int value) {this.value = value;}public EncryptNode(Byte c, int value) {this.value = value;this.c = c;}@Overridepublic int compareTo(EncryptNode o) {return this.value - o.value; //升序}@Overridepublic String toString() {return "EncryptNode{" +"value=" + value +", c=" + c +'}';} }
3.赫夫曼數(shù)據(jù)解壓
1.將字節(jié)轉(zhuǎn)換位二進制字符串,這涉及到高位補齊,地位截取的情況。
2.將赫夫曼編碼表中的key和value進行置換,因為原赫夫曼編碼表的key是字符,value是二進制字節(jié);而我們解壓時要掃描一大串二進制,需要根據(jù)部分二進制找到赫夫曼編碼表中的字符,所以需要進行置換。編碼我們根據(jù)二進制獲取字符
4.全代碼
package com.zsh.algorithm.tree;import java.io.*; import java.util.*;/*** @author:Ronin* @since:2021/9/29* @email:1817937322@qq.com*/public class HuffmanEncrypt {public static void main(String[] args) {}//用來存儲轉(zhuǎn)換二進制字符串集合private static StringBuilder stringBuilder = new StringBuilder();//哈夫曼編碼表private static Map<Byte, String> huffmanCodesMap = new HashMap<>();//---------------------------------------以下是壓縮相關函數(shù)------------------------------------------///*** 赫夫曼編碼封裝方法** @param bytes* @return*/public static byte[] huffmanZip(byte[] bytes) {//1.計算字符串中每個字符出現(xiàn)的次數(shù),并字符、次數(shù)封裝到EncryptNode結點中,將其加入到list集合中List<EncryptNode> encryptNodeList = calByteNums(bytes);//outputList(encryptNodeList);//2.構建哈夫曼樹EncryptNode root = buildHuffmanTree(encryptNodeList);// preErgodic(root);//3.生成赫夫曼編碼表Map<Byte, String> huffmanCodes = getHuffmanCodes(root);//System.out.println(huffmanCodes);//4.轉(zhuǎn)換為赫夫曼編碼字節(jié)表return zip(bytes, huffmanCodes);}/*** 計算字符串中的每個字符的長度** @param bytes 待計算的字符串* @return 返回list集合*/public static List<EncryptNode> calByteNums(byte[] bytes) {Map<Byte, Integer> map = new HashMap<>();byte[] charArray = bytes;for (byte c : charArray) {if (map.containsKey(c)) {int num = map.get(c);map.replace(c, num, num + 1);} else {map.put(c, 1);}}List<EncryptNode> list = new ArrayList<>();for (Map.Entry<Byte, Integer> entry : map.entrySet()) {Byte key = entry.getKey();Integer value = entry.getValue();list.add(new EncryptNode(key, value));}return list;}/*** 構建赫夫曼樹** @param list 權重集合* @return 返回root節(jié)點*/public static EncryptNode buildHuffmanTree(List<EncryptNode> list) {while (list.size() > 1) {Collections.sort(list);EncryptNode leftNode = list.remove(0);EncryptNode rightNode = list.remove(0);EncryptNode parentNode = new EncryptNode(null, leftNode.value + rightNode.value);parentNode.left = leftNode;parentNode.right = rightNode;list.add(parentNode);}return list.remove(0);}/*** 重載getHuffmanCodes(EncryptNode node, String path, StringBuilder stringBuilder)** @param root 根節(jié)點* @return 哈夫曼編碼map集合*/public static Map<Byte, String> getHuffmanCodes(EncryptNode root) {if (root == null) {return null;} else {getHuffmanCodes(root.left, "0", stringBuilder);getHuffmanCodes(root.right, "1", stringBuilder);}return huffmanCodesMap;}/*** 赫夫曼編碼** @param node 結點* @param path 1代表右結點,0代表左結點路徑* @param stringBuilder 用于path拼接*/public static void getHuffmanCodes(EncryptNode node, String path, StringBuilder stringBuilder) {StringBuilder stringBuilder2 = new StringBuilder(stringBuilder);stringBuilder2.append(path);if (node != null) {if (node.c == null) {getHuffmanCodes(node.left, "0", stringBuilder2);getHuffmanCodes(node.right, "1", stringBuilder2);} else {huffmanCodesMap.put(node.c, stringBuilder2.toString());}}}/*** @param bytes 待編碼字節(jié)數(shù)組* @param entry 赫夫曼編碼集合* @return 返回編碼后的byte數(shù)組*/public static byte[] zip(byte[] bytes, Map<Byte, String> entry) {StringBuilder stringBuilder = new StringBuilder();for (byte ch : bytes) {stringBuilder.append(entry.get(ch));}//System.out.println("stringBuilder=" + stringBuilder.toString());int length = stringBuilder.length();int len = (length + 7) / 8;byte[] huffmanCodeBytes = new byte[len];int index = 0;for (int k = 0; k < length; k += 8) {String str;if ((k + 8) > length) {str = stringBuilder.substring(k);} else {str = stringBuilder.substring(k, k + 8);}huffmanCodeBytes[index] = (byte) Integer.parseInt(str, 2);index++;}return huffmanCodeBytes;}/*** 計算赫夫曼壓縮率** @param original 未壓縮前的字節(jié)數(shù)組* @param newByte 壓縮后的字節(jié)數(shù)組* @return 壓縮率*/public static String calHuffmanZipRatio(byte[] original, byte[] newByte) {double zipRatio = (double) (original.length - newByte.length) / original.length * 100;String s = String.format("%.2f", zipRatio);return s + "%";}//---------------------------------以上是壓縮相關函數(shù)--------------------------------------------------------////---------------------------------以下是解壓縮相關函數(shù)-----------------------------------------------------//private static byte[] decode(Map<Byte, String> huffmanCodesMap, byte[] bytes) {StringBuilder stringBuilder = new StringBuilder();for (int k = 0; k < bytes.length; k++) {boolean flag = (k == bytes.length - 1);stringBuilder.append(byteToBinaryString(!flag, bytes[k]));}Map<String, Byte> reverseMap = new HashMap<>();for (Map.Entry<Byte, String> map : huffmanCodesMap.entrySet()) {reverseMap.put(map.getValue(), map.getKey());}List<Byte> list = new ArrayList<>();for (int i = 0; i < stringBuilder.length(); ) {int count = 1;Byte b = null;boolean flag = true;while (flag) {String key = stringBuilder.substring(i, i + count);b = reverseMap.get(key);if (b == null) {count++;} else {flag = false;}}list.add(b);i += count;}byte[] result = new byte[list.size()];for (int k = 0; k < result.length; k++) {result[k] = list.get(k);}return result;}/*** @param flag 如果為true, 表示需要進行高位補齊;如果是false,表示不需要進行高位補齊-->因為最后一位不一定剛好是滿足八位* @param be 這是將轉(zhuǎn)換為二進制字符串的字節(jié)* @return 返回二進制字符串*/private static String byteToBinaryString(boolean flag, byte be) {int beInt = be;if (flag) {beInt |= 256; //按位與 , 解決正數(shù)的高位補齊的問題}String s = Integer.toBinaryString(beInt); //補碼,正數(shù)需要高位補齊,負數(shù)需要截取if (flag) {return s.substring(s.length() - 8);} else {return s;}}//---------------------------------以上是解壓縮相關函數(shù)-----------------------------------------------------////---------------------------------以下是文件壓縮解壓相關函數(shù)-----------------------------------------------------///*** 文件壓縮方法 --- 赫夫曼編碼壓縮* @param readFile 壓縮文件路徑 文件 + 文件名* @param storeFile 壓縮之后存儲的路徑 文件 + 文件名*/private static void fileEncode(String readFile, String storeFile) {FileInputStream is = null;ObjectOutputStream oos = null;FileOutputStream os = null;try {is = new FileInputStream(readFile);byte[] bytes = new byte[is.available()];is.read(bytes);byte[] huffmanBytes = huffmanZip(bytes);os = new FileOutputStream(storeFile);oos = new ObjectOutputStream(os);oos.writeObject(huffmanBytes);oos.writeObject(huffmanCodesMap);os.write(huffmanBytes);} catch (FileNotFoundException e) {e.printStackTrace();} catch (IOException e) {e.printStackTrace();} finally {try {oos.close();os.close();is.close();} catch (IOException e) {e.printStackTrace();}}}public static void fileDecode(String zipFile, String dstFile) {System.out.println("文件解壓開始----------------------");FileInputStream is = null;ObjectInputStream ois = null;FileOutputStream os = null;try {is = new FileInputStream(zipFile);ois = new ObjectInputStream(is);byte[] huffmanBytes = (byte[])ois.readObject();Map<Byte, String> huffmanCodes = (Map<Byte, String>) ois.readObject();byte[] bytes = decode(huffmanCodes, huffmanBytes);os = new FileOutputStream(dstFile);os.write(bytes);} catch (FileNotFoundException e) {e.printStackTrace();} catch (IOException e) {e.printStackTrace();} catch (ClassNotFoundException e) {e.printStackTrace();} finally {try {os.close();ois.close();is.close();} catch (IOException e) {e.printStackTrace();}}}//---------------------------------以上是文件壓縮解壓相關函數(shù)-----------------------------------------------------////先序遍歷赫夫曼樹public static void preErgodic(EncryptNode cur) {if (cur == null) {return;}System.out.println(cur);preErgodic(cur.left);preErgodic(cur.right);}//打印list集合public static <T> void outputList(List<T> list) {for (T t : list) {System.out.println(t);}} }/*** 實現(xiàn)Comparable接口,便于使用Collections.sort排序*/ class EncryptNode implements Comparable<EncryptNode> {public int value; //出現(xiàn)的次數(shù)public Byte c; //字符public EncryptNode left;public EncryptNode right;public EncryptNode(int value) {this.value = value;}public EncryptNode(Byte c, int value) {this.value = value;this.c = c;}@Overridepublic int compareTo(EncryptNode o) {return this.value - o.value; //升序}@Overridepublic String toString() {return "EncryptNode{" +"value=" + value +", c=" + c +'}';} }總結
以上是生活随笔為你收集整理的算法系列之赫夫曼编码实战一【数据压缩、数据解压】的全部內(nèi)容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: 算法系列之赫夫曼树的精解【构造流程及原理
- 下一篇: 算法系列之使用赫夫曼编码的实战应用【对文