学习HTML-Beautify.js
生活随笔
收集整理的這篇文章主要介紹了
学习HTML-Beautify.js
小編覺得挺不錯的,現在分享給大家,幫大家做個參考.
文章截圖 - 更好的排版
源代碼下載
http://jsbeautifier.org/ 這個網站提供了在線的格式化JavaScript的功能,為了學習相關的知識, 我們先從簡單的入手。
在下載包中有一個HTML-Beautify.js文件,用來對HTML進行格式化。
其中的處理考慮了很多情況,如果頁面中存在<script>標簽,它會自動調用beautify.js來格式化JavaScript,這個文件有15k,428行代碼。
為了達到學習的目的,我對這個JS文件進行了精簡,只保留最基本的功能,并且只能處理比較簡單的情況,如下一段html代碼: <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html xmlns="http://www.w3.org/1999/xhtml" ><head><title>Untitled Page</title></head><body><h1>Page Header</h1><ul><li>list item 1</li><li>list item 2</li><li>list item 3</li><li>list item 4</li></ul></body></html>我們來看下源代碼(添加了注釋): function style_html(html_source, indent_size, indent_character) {// Parser可以看做類,multi_parser可以看做是Parser的實例對象var Parser, multi_parser;function Parser() {// 當前字符所在的位置this.pos = 0;// 當前需要輸出的標記的類型(在HTML中只有兩種類型 - 內容和標簽)this.current_mode = 'CONTENT';// 記錄頁面中標簽的存儲結構,目的是為了在結束標簽時設置縮進級別// 個人覺得這個地方處理的有點拗口,我們完全可以使用棧來解決this.tags = {parent: 'parent1',parentcount: 1,parent1: ''};// 當前標記的正文和類型this.token_text = '';this.token_type = '';this.Utils = {// 空白字符,這些字符將會被簡單的空格取代whitespace: "\n\r\t ".split(''),// 下面這些標簽被認為是獨立標簽single_token: 'br,input,link,meta,!doctype,basefont,base,area,hr,wbr,param,img,isindex,?xml,embed'.split(','),// what是否在數組arr中in_array: function(what, arr) {for (var i = 0; i < arr.length; i++) {if (what === arr[i]) {return true;}}return false;}}// 獲取當前正文的標記this.get_content = function() {var input_char = '';var content = [];var space = false;// 下一個"<"之前的都是正文while (this.input.charAt(this.pos) !== '<') {if (this.pos >= this.input.length) {return content.length ? content.join('') : ['', 'TK_EOF'];}input_char = this.input.charAt(this.pos);this.pos++;if (this.Utils.in_array(input_char, this.Utils.whitespace)) {if (content.length) {space = true;}continue;}else if (space) {content.push(' ');space = false;}content.push(input_char);}return [content.length ? content.join('') : '', "TK_CONTENT"];}this.record_tag = function(tag) { //function to record a tag and its parent in this.tags Objectif (this.tags[tag + 'count']) { //check for the existence of this tag typethis.tags[tag + 'count']++;this.tags[tag + this.tags[tag + 'count']] = this.indent_level; //and record the present indent level}else { //otherwise initialize this tag typethis.tags[tag + 'count'] = 1;this.tags[tag + this.tags[tag + 'count']] = this.indent_level; //and record the present indent level}this.tags[tag + this.tags[tag + 'count'] + 'parent'] = this.tags.parent; //set the parent (i.e. in the case of a div this.tags.div1parent)this.tags.parent = tag + this.tags[tag + 'count']; //and make this the current parent (i.e. in the case of a div 'div1')}this.retrieve_tag = function(tag) { //function to retrieve the opening tag to the corresponding closerif (this.tags[tag + 'count']) { //if the openener is not in the Object we ignore itvar temp_parent = this.tags.parent; //check to see if it's a closable tag.while (temp_parent) { //till we reach '' (the initial value);if (tag + this.tags[tag + 'count'] === temp_parent) { //if this is it use itbreak;}temp_parent = this.tags[temp_parent + 'parent']; //otherwise keep on climbing up the DOM Tree}if (temp_parent) { //if we caught somethingthis.indent_level = this.tags[tag + this.tags[tag + 'count']]; //set the indent_level accordinglythis.tags.parent = this.tags[temp_parent + 'parent']; //and set the current parent}delete this.tags[tag + this.tags[tag + 'count'] + 'parent']; //delete the closed tags parent reference...delete this.tags[tag + this.tags[tag + 'count']]; //...and the tag itselfif (this.tags[tag + 'count'] == 1) {delete this.tags[tag + 'count'];}else {this.tags[tag + 'count']--;}}}// 獲取當前標簽的標記this.get_tag = function() {var input_char = '';var content = [];var space = false;do {if (this.pos >= this.input.length) {return content.length ? content.join('') : ['', 'TK_EOF'];}input_char = this.input.charAt(this.pos);this.pos++;if (this.Utils.in_array(input_char, this.Utils.whitespace)) { //don't want to insert unnecessary spacespace = true;continue;}if (input_char === '=') {space = false;}// "="和">"之前不要有空格if (content.length && content[content.length - 1] !== '=' && input_char !== '>' && space) {content.push(' ');space = false;}content.push(input_char);} while (input_char !== '>');var tag_complete = content.join('');var tag_index;if (tag_complete.indexOf(' ') != -1) {tag_index = tag_complete.indexOf(' ');}else {tag_index = tag_complete.indexOf('>');}var tag_check = tag_complete.substring(1, tag_index).toLowerCase();var tag_type = "";if (tag_complete.charAt(tag_complete.length - 2) === '/' || this.Utils.in_array(tag_check, this.Utils.single_token)) {tag_type = 'SINGLE';}else {if (tag_check.charAt(0) === '/') {// 結束標簽,設置當前indent_level并且將此標簽從this.tags中移除,包含此標簽的所有子標簽this.retrieve_tag(tag_check.substring(1));tag_type = 'END';}else {// 開始標簽,將此標簽添加到this.tagsthis.record_tag(tag_check);tag_type = 'START';}}return [content.join(''), "TK_TAG_" + tag_type];}// 獲取下一個標記this.get_token = function() {if (this.current_mode === 'CONTENT') {return this.get_content();}if (this.current_mode === 'TAG') {return this.get_tag();}}this.printer = function(js_source, indent_character, indent_size) {this.input = js_source || '';this.output = [];this.indent_character = indent_character || ' ';this.indent_string = '';this.indent_size = indent_size || 2;this.indent_level = 0;for (var i = 0; i < this.indent_size; i++) {this.indent_string += this.indent_character;}this.print_newline = function(ignore, arr) {this.line_char_count = 0;if (!arr || !arr.length) {return;}if (!ignore) {while (this.Utils.in_array(arr[arr.length - 1], this.Utils.whitespace)) {arr.pop();}}arr.push('\n');for (var i = 0; i < this.indent_level; i++) {arr.push(this.indent_string);}}this.print_token = function(text) {this.output.push(text);}this.indent = function() {this.indent_level++;}this.unindent = function() {if (this.indent_level > 0) {this.indent_level--;}}}return this;}/*_____________________--------------------_____________________*/// 創建Parser的實例,設置Parser構造函數中的this指向multi_parser對象,同時執行此函數multi_parser = new Parser();// 調用printer函數,此時printer函數中的this也指向multi_parser對象multi_parser.printer(html_source, indent_character, indent_size); //initialize starting values// 循環獲取每一個標記(Token)直到結束while (true) {var t = multi_parser.get_token();// 當前標記的內容和類型multi_parser.token_text = t[0];multi_parser.token_type = t[1];// 如果這是一個結束標記,跳出循環if (multi_parser.token_type === 'TK_EOF') {break;}switch (multi_parser.token_type) {case 'TK_TAG_START':// 標簽開始 - 1.輸出新行 2.輸出標記內容 3.下一個標記縮進一個單位 4.下一個標記類型為內容multi_parser.print_newline(false, multi_parser.output);multi_parser.print_token(multi_parser.token_text);multi_parser.indent();multi_parser.current_mode = 'CONTENT';break;case 'TK_TAG_END':// 標簽結束 - 1.輸出新行 2.輸出標記內容 3.下一個標記類型為內容multi_parser.print_newline(true, multi_parser.output);multi_parser.print_token(multi_parser.token_text);multi_parser.current_mode = 'CONTENT';break;case 'TK_TAG_SINGLE':// 獨立標簽(比如<br />) - 1.輸出新行 2.輸出標記內容 3.下一個標記類型為內容multi_parser.print_newline(false, multi_parser.output);multi_parser.print_token(multi_parser.token_text);multi_parser.current_mode = 'CONTENT';break;case 'TK_CONTENT':// 內容 - 1.輸出新行 2.輸出標記內容 3.下一個標記類型為標簽if (multi_parser.token_text !== '') {multi_parser.print_newline(false, multi_parser.output);multi_parser.print_token(multi_parser.token_text);}multi_parser.current_mode = 'TAG';break;}}// 最終結果return multi_parser.output.join('');}$(function() {$("#format").click(function() {// 格式化id="content"的textarea的內容,縮進為四個空格$("#content").val(style_html($("#content").val(), 4, ""));});});其中對標簽的處理有點拗口,我們特別拿出來講解。
比如有如下的結構:
<body><div><span><span></span></span></div></body>
獲取body標簽時,this.tags的內容為:
{"bodycount":1,"body1":1,"body1parent":"html1"}
獲取div時:
{"bodycount":1,"body1":1,"body1parent":"html1","divcount":1,"div1":2,"div1parent":"body1"}
獲取第一個span時:
{"bodycount":1,"body1":1,"body1parent":"html1","divcount":1,"div1":2,"div1parent":"body1","spancount":1,"span1":3,"span1parent":"div1"}
獲取第二個span時:
{"bodycount":1,"body1":1,"body1parent":"html1","divcount":1,"div1":2,"div1parent":"body1","spancount":2,"span1":3,"span1parent":"div1","span2":4,"span2parent":"span1"}
獲取第一個/span時:
{"bodycount":1,"body1":1,"body1parent":"html1","divcount":1,"div1":2,"div1parent":"body1","spancount":1,"span1":3,"span1parent":"div1"}
此時this.indent_level = 4; 也就是說里面span標簽的縮進為4個單位
獲取第二個/span時:
{"bodycount":1,"body1":1,"body1parent":"html1","divcount":1,"div1":2,"div1parent":"body1"}
此時this.indent_level = 3; 也就是說里面span標簽的縮進為3個單位
以此類推….
轉載于:https://www.cnblogs.com/sanshi/archive/2009/07/23/1529395.html
總結
以上是生活随笔為你收集整理的学习HTML-Beautify.js的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: [转]实用至上!12个Vista服务可关
- 下一篇: 使用Windows Live Write