代码之家  ›  专栏  ›  技术社区  ›  Omu

自动删除缩进,HTML中不需要的空白(缩小)

  •  1
  • Omu  · 技术社区  · 14 年前

    我有一个ASP.NET-MVC应用程序,当我查看页面的源代码时,我发现HTML是缩进的,有很多空白空间,

    我想如果我删除所有这些空间,我的页面将变小(以KB为单位)

    有人知道如何自动删除它们吗?

    3 回复  |  直到 14 年前
        1
  •  2
  •   David    14 年前

    取自 http://madskristensen.net/post/A-whitespace-removal-HTTP-module-for-ASPNET-20.aspx .

    [AttributeUsage(AttributeTargets.Class, Inherited = true, AllowMultiple = false)]
    internal class WhiteSpaceFilterAttribute : ActionFilterAttribute
    {
        public override void OnActionExecuting(ActionExecutingContext filterContext)
    {
            filterContext.HttpContext.Response.Filter = new WhiteSpaceStream(filterContext.HttpContext.Response.Filter);
        }
    }
    
    internal class WhiteSpaceStream : Stream
    {
        private Stream m_sink;
        private static Regex m_regex = new Regex(@"(?<=[^])\t{2,}|(?<=[>])\s{2,}(?=[<])|(?<=[>])\s{2,11}(?=[<])|(?=[\n])\s{2,}");
        //private static Regex m_regex = new Regex(@"^\s+", RegexOptions.Multiline | RegexOptions.Compiled); 
    
        public WhiteSpaceStream(Stream sink)
        {
            m_sink = sink;
        }
    
        public override bool CanRead
        {
            get { return true; }
        }
    
        public override bool CanSeek
        {
            get { return true; }
        }
    
        public override bool CanWrite
        {
            get { return true; }
        }
    
        public override void Flush()
        {
            m_sink.Flush();
        }
    
        public override long Length
        {
            get { return 0; }
        }
    
        private long _position;
        public override long Position
        {
            get { return _position; }
            set { _position = value; }
        }
    
        public override int Read(byte[] buffer, int offset, int count)
        {
            return m_sink.Read(buffer, offset, count);
        }
    
        public override long Seek(long offset, SeekOrigin origin)
        {
            return m_sink.Seek(offset, origin);
        }
    
        public override void SetLength(long value)
        {
            m_sink.SetLength(value);
        }
    
        public override void Close()
        {
            m_sink.Close();
        }
    
        public override void Write(byte[] buffer, int offset, int count)
        {
            byte[] data = new byte[count];
            Buffer.BlockCopy(buffer, offset, data, 0, count);
            string text = Encoding.Default.GetString(buffer);
    
            text = m_regex.Replace(text, string.Empty);
    
            byte[] outdata = System.Text.Encoding.Default.GetBytes(text);
            m_sink.Write(outdata, 0, outdata.GetLength(0));
        }
    
        2
  •  2
  •   Gup3rSuR4c    14 年前

    我想推荐以下代码。它工作得很好(我在几个网站上使用),比@david的版本简单:

    using System;
    using System.IO;
    using System.Linq;
    using System.Text;
    using System.Text.RegularExpressions;
    using System.Web;
    using System.Web.Mvc;
    
    public class WhitespaceStrip : ActionFilterAttribute {
        public override void OnActionExecuting(
            ActionExecutingContext Context) {
            try {
                Context.HttpContext.Response.Filter = new WhitespaceFilter();
            } catch (Exception) {
                //  Ignore
            };
        }
    }
    
    public class WhitespaceFilter : MemoryStream {
        private HttpResponse Response = HttpContext.Current.Response;
        private Stream Filter = null;
    
        private string Source = string.Empty;
        private string[] ContentTypes = new string[1] {
            "text/html"
        };
    
        public WhitespaceFilter() {
            this.Filter = this.Response.Filter;
        }
    
        public override void Write(
            byte[] Buffer,
            int Offset,
            int Count) {
            this.Source = Encoding.UTF8.GetString(Buffer);
    
            if (this.ContentTypes.Contains(this.Response.ContentType)) {
                this.Response.ContentEncoding = Encoding.UTF8;
    
                this.Source = new Regex("(<pre>[^<>]*(((?<Open><)[^<>]*)+((?<Close-Open>>)[^<>]*)+)*(?(Open)(?!))</pre>)|\\s\\s+|[\\t\\n\\r]", RegexOptions.Compiled | RegexOptions.Singleline).Replace(this.Source, "$1");
                this.Source = new Regex("<!--.*?-->", RegexOptions.Compiled | RegexOptions.Singleline).Replace(this.Source, string.Empty);
    
                this.Filter.Write(Encoding.UTF8.GetBytes(this.Source), Offset, Encoding.UTF8.GetByteCount(this.Source));
            } else {
                this.Filter.Write(Encoding.UTF8.GetBytes(this.Source), Offset, Encoding.UTF8.GetByteCount(this.Source));
            };
        }
    }
    

    更新

    @天哪,只是因为你说的话让我很生气 “6X” 再慢一点,我开始看你是不是对的。最后我重新编写了过滤器并清理了一点,然后运行了一些测试,在测试中我循环了一个表10000以生成一些空白空间,并查看过滤器是如何工作的。说到做到,我看不出这两个正则表达式有什么不同。

    现在,如果你是在暗示表达式的工作方式不同,我的表达式会慢一些,也许这是有道理的,但是如果你看到任何不同,你必须推出超过1 MB的大小 HTML 页。。。我希望不是你在做什么。

    此外,我的表达保留了 <pre> 元素。。。

    所有这些都说,这是我的修订版:

    using System;
    using System.IO;
    using System.Linq;
    using System.Text;
    using System.Text.RegularExpressions;
    using System.Web;
    using System.Web.Mvc;
    
    [AttributeUsage(AttributeTargets.Class, Inherited = true, AllowMultiple = false)]
    internal class WhitespaceStripAttribute : ActionFilterAttribute {
        public override void OnActionExecuted(
            ActionExecutedContext ActionExecutedContext) {
            ActionExecutedContext.HttpContext.Response.Filter = new WhitespaceStream(ActionExecutedContext.HttpContext);
        }
    }
    
    internal class WhitespaceStream : MemoryStream {
        private readonly HttpContextBase HttpContext = null;
        private readonly Stream FilterStream = null;
    
        private readonly string[] ContentTypes = new string[1] {
            "text/html"
        };
    
        private static Regex WhitespaceRegex = new Regex("(<pre>[^<>]*(((?<Open><)[^<>]*)+((?<Close-Open>>)[^<>]*)+)*(?(Open)(?!))</pre>)|\\s\\s+|[\\t\\n\\r]", RegexOptions.Singleline | RegexOptions.Compiled);
        private static Regex CommentsRegex = new Regex("<!--.*?-->", RegexOptions.Singleline | RegexOptions.Compiled);
    
        public WhitespaceStream(
            HttpContextBase HttpContext) {
            this.HttpContext = HttpContext;
            this.FilterStream = HttpContext.Response.Filter;
        }
    
        public override void Write(
            byte[] Buffer,
            int Offset,
            int Count) {
            string Source = Encoding.UTF8.GetString(Buffer);
    
            if (this.ContentTypes.Any(
                ct =>
                    (ct == this.HttpContext.Response.ContentType))) {
                this.HttpContext.Response.ContentEncoding = Encoding.UTF8;
    
                Source = WhitespaceRegex.Replace(Source, "$1");
                Source = CommentsRegex.Replace(Source, string.Empty);
            };
    
            this.FilterStream.Write(Encoding.UTF8.GetBytes(Source), Offset, Encoding.UTF8.GetByteCount(Source));
        }
    }
    
        3
  •  1
  •   Piskvor left the building Rohit Kumar    14 年前

    与HTML一样,多个空格被视为一个空格,您可以在响应中使用正则表达式:

    /\s+/ /g
    

    它将任何连续空格转换为单个空格。

    请注意,虽然这会减小未压缩页面的大小,但如果您要gzip这些页面,节省的空间不会太大。

    警告:这可能会破坏内联JavaScript,因为JS将一个结束行作为命令分隔符(即 ; )如果您的JS使用 ; 对于定界命令(大多数JS都这样做),您应该是正常的。

    也, code samples in <pre> blocks 将受到影响,因为此处显示空白:

    some   code   here {
      more          code }
    

    变成

    some code here { more code }