asp.net去掉HTML标记代码
///???<summary>???
??///???去除HTML標記???
?///???</summary>???
///???<param???name="NoHTML">包括HTML的源碼???</param>???
?///???<returns>已經去除后的文字</returns>???
??public???static???string???NoHTML(string???Htmlstring)???
??{???
?//刪除腳本???
??Htmlstring???=???Regex.Replace(Htmlstring,@"<script[^>]*?>.*?</script>","",RegexOptions.IgnoreCase);???
??//刪除HTML???
??Htmlstring???=???Regex.Replace(Htmlstring,@"<(.[^>]*)>","",RegexOptions.IgnoreCase);???
??Htmlstring???=???Regex.Replace(Htmlstring,@"([\r\n])[\s]+","",RegexOptions.IgnoreCase);???
??Htmlstring???=???Regex.Replace(Htmlstring,@"-->","",RegexOptions.IgnoreCase);???
??Htmlstring???=???Regex.Replace(Htmlstring,@"<!--.*","",RegexOptions.IgnoreCase);???
????
??Htmlstring???=???Regex.Replace(Htmlstring,@"&(quot|#34);","\"",RegexOptions.IgnoreCase);???
??Htmlstring???=???Regex.Replace(Htmlstring,@"&(amp|#38);","&",RegexOptions.IgnoreCase);???
??Htmlstring???=???Regex.Replace(Htmlstring,@"&(lt|#60);","<",RegexOptions.IgnoreCase);???
??Htmlstring???=???Regex.Replace(Htmlstring,@"&(gt|#62);",">",RegexOptions.IgnoreCase);???
??Htmlstring???=???Regex.Replace(Htmlstring,@"&(nbsp|#160);","???",RegexOptions.IgnoreCase);???
??Htmlstring???=???Regex.Replace(Htmlstring,@"&(iexcl|#161);","\xa1",RegexOptions.IgnoreCase);???
??Htmlstring???=???Regex.Replace(Htmlstring,@"&(cent|#162);","\xa2",RegexOptions.IgnoreCase);???
??Htmlstring???=???Regex.Replace(Htmlstring,@"&(pound|#163);","\xa3",RegexOptions.IgnoreCase);???
??Htmlstring???=???Regex.Replace(Htmlstring,@"&(copy|#169);","\xa9",RegexOptions.IgnoreCase);???
??Htmlstring???=???Regex.Replace(Htmlstring,???@"&#(\d+);","",RegexOptions.IgnoreCase);???
????
??Htmlstring.Replace("<","");???
??Htmlstring.Replace(">","");???
??Htmlstring.Replace("\r\n","");???
??Htmlstring=HttpContext.Current.Server.HtmlEncode(Htmlstring).Trim();???
????
??return???Htmlstring;???
??}
??///???<summary>???
??///???去除HTML標記???
??///???</summary>???
??///???<param???name="strHtml">包括HTML的源碼???</param>???
??///???<returns>已經去除后的文字</returns>???
??using???System;???
??using???System.Text.RegularExpressions;???
??public???class???StripHTMLTest{???
??????public???static???void???Main(){???
??????????string???s=StripHTML("<HTML><HEAD><TITLE>中國石龍信息平臺</TITLE></HEAD><BODY>faddfs龍信息平臺</BODY></HTML>");???
??????????Console.WriteLine(s);???
??????}???
????
??????public???static???string???StripHTML(string???strHtml){???
??????????string???[]???aryReg???={???
??????????????????????@"<script[^>]*?>.*?</script>",???
????
??????????????????????@"<(\/\s*)?!?((\w+:)?\w+)(\w+(\s*=?\s*(([""'])(\\[""'tbnr]|[^\7])*?\7|\w+)|.{0})|\s)*?(\/\s*)?>",???
??????????????????????@"([\r\n])[\s]+",???
??????????????????????@"&(quot|#34);",???
??????????????????????@"&(amp|#38);",???
??????????????????????@"&(lt|#60);",???
??????????????????????@"&(gt|#62);",?????
??????????????????????@"&(nbsp|#160);",?????
??????????????????????@"&(iexcl|#161);",???
??????????????????????@"&(cent|#162);",???
??????????????????????@"&(pound|#163);",???
??????????????????????@"&(copy|#169);",???
??????????????????????@"&#(\d+);",???
??????????????????????@"-->",???
??????????????????????@"<!--.*\n"???
????????????????????};???
????
??????????string???[]???aryRep???=???{???
????????????????????????"",???
????????????????????????"",???
????????????????????????"",???
????????????????????????"\"",???
????????????????????????"&",???
????????????????????????"<",???
????????????????????????">",???
????????????????????????"???",???
????????????????????????"\xa1",//chr(161),???
????????????????????????"\xa2",//chr(162),???
????????????????????????"\xa3",//chr(163),???
????????????????????????"\xa9",//chr(169),???
????????????????????????"",???
????????????????????????"\r\n",???
????????????????????????""???
??????????????????????};???
????
??????????string???newReg???=aryReg[0];???
??????????string???strOutput=strHtml;???
??????????for(int???i???=???0;i<aryReg.Length;i++){???
??????????????Regex???regex???=???new???Regex(aryReg[i],RegexOptions.IgnoreCase);???
??????????????strOutput???=???regex.Replace(strOutput,aryRep[i]);???
??????????}???
??????????strOutput.Replace("<","");???
??????????strOutput.Replace(">","");???
??????????strOutput.Replace("\r\n","");???
??????????return???strOutput;???
??????}???
??}
?#region???移除HTML標簽???
??///???<summary>???
??///???移除HTML標簽???
??///???</summary>???
??///???<param???name="HTMLStr">HTMLStr</param>???
??public???static???string?????ParseTags(string???HTMLStr)???
??{???
??return???System.Text.RegularExpressions.Regex.Replace(HTMLStr,???"<[^>]*>",???"");?????
??}???
????
??#endregion???
????
?????????????????#region???取出文本中的圖片地址???
??????????????????///???<summary>???
??????????????????///???取出文本中的圖片地址???
??????????????????///???</summary>???
??????????????????///???<param???name="HTMLStr">HTMLStr</param>???
??????????????????public???static???string???GetImgUrl(string???HTMLStr)???
??????????????????{???
??????????????????????????string???str???=???string.Empty;???
??????????????????????????string???sPattern???=???@"^<img\s+[^>]*>";???
??????????????????????????Regex???r???=???new???Regex(@"<img\s+[^>]*\s*src\s*=\s*([']?)(?<url>\S+)'?[^>]*>",???
??????????????????????????????????RegexOptions.Compiled);???
??????????????????????????Match???m???=???r.Match(HTMLStr.ToLower());???
??????????????????????????if???(m.Success)???
??????????????????????????????????str???=???m.Result("${url}");???
??????????????????????????return???str;???
??????????????????}???
????
??????????????????#endregion
轉載于:https://www.cnblogs.com/chennie/archive/2011/09/24/2189551.html
總結
以上是生活随笔為你收集整理的asp.net去掉HTML标记代码的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: 微软2011 Build大会:Windo
- 下一篇: 查看 Oracle 是32位还是64位的