static void

Convert HTML to XHTML

Test the version using regex- showing some of the features (and oddities).

using NUnit.Framework;
using System;

namespace XHtmlConvertor
{
    
/// <summary>
    /// Test the html conversion with regex.
    /// </summary>
    /// <remarks>
    /// Most of these fail with Agility because of slight differences-
    /// some are minor formatting, but some are corner cases (both will change INSIDE server tags occasionally)
    /// </remarks>
    [TestFixture]
    
public class TestConvertWithRegex
    {
        
/// <summary>
        /// Lowercase and close elements.
        /// </summary>
        /// <remarks>Fails with agility (agility uses (space)/> to close tags)</remarks>
        [Test]
        
public void TestBr()
        {
            
string input = @"<P><BR><br/></P>";
            
string expected = @"<p><br/><br/></p>";
            
ConvertWithRegex c = new ConvertWithRegex();
            c.LoadHtml(input);
            
string output = c.SaveToString();
            Assert.AreEqual(expected, output);
        }
        
/// <summary>
        /// Lines within elements.
        /// </summary>
        /// <remarks>Regex version preserves the spaces and line bresaks, agility removes them and closes the div</remarks>
        [Test]
        
public void TestMultiLine()
        {
            
string input = @"<DIV
                    ID=""MYID""        CLASS = ""UGLY"">"
;
            
string expected = @"<div
                    id=""MYID""        class = ""UGLY"">"
;
            
ConvertWithRegex c = new ConvertWithRegex();
            c.LoadHtml(input);
            
string output = c.SaveToString();
            Assert.AreEqual(expected, output);
        }
        
/// <summary>
        /// Three different types of attribute (single/double quoted, unquoted, no value)
        /// </summary>
        /// <remarks>Agility makes single quotes to double quotes</remarks>
        [Test]
        
public void TestAttributes()
        {
            
string input = @"<INPUT VALUE='1' ID=""A"" READONLY TYPE=""text"">";
            
string expected = @"<input value='1' id=""A"" readonly=""readonly"" type=""text""/>";
            
ConvertWithRegex c = new ConvertWithRegex();
            c.LoadHtml(input);
            
string output = c.SaveToString();
            Assert.AreEqual(expected, output);
        }

        
/// <summary>
        /// Insert a doctype
        /// </summary>
        [Test]
        
public void TestDocType()
        {
            
string input = @"<HTML><HEAD></HEAD><BODY bgColor=#eeeeee><H1>HELLO</H1></BODY></HTML>";
            
string expected = @"<!DOCTYPE html PUBLIC ""-//W3C//DTD XHTML 1.0 Transitional//EN"" ""http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"">" +
                
"\r\n" + @"<html xmlns=""http://www.w3.org/1999/xhtml""><head></head><body bgcolor=""#eeeeee""><h1>HELLO</h1></body></html>";
            
ConvertWithRegex c = new ConvertWithRegex();
            c.LoadHtml(input);
            
string output = c.SaveToString();
            Assert.AreEqual(expected, output);
        }

        
/// <summary>
        /// Remove the language tag
        /// </summary>
        [Test]
        
public void TestScriptLanguage()
        {
            
string input = @"<SCRIPT LANGUAGE=""JAVASCRIPT""><!--\nalert(""Here"");\n//--></SCRIPT>";
            
string expected = @"<script><!--\nalert(""Here"");\n//--></script>";
            
ConvertWithRegex c = new ConvertWithRegex();
            c.LoadHtml(input);
            
string output = c.SaveToString();
            Assert.AreEqual(expected, output);
        }

        
/// <summary>
        /// Change html tags but not asp controls
        /// </summary>
        /// <remarks>Agility alters the case of asp controls and adds a closing element</remarks>
        [Test]
        
public void TestAspControl()
        {
            
string input = @"<P><asp:TextBox Runat=""Server"" Value=""Test""/></P>";
            
string expected = @"<p><asp:TextBox Runat=""Server"" Value=""Test""/></p>";
            
ConvertWithRegex c = new ConvertWithRegex();
            c.LoadHtml(input);
            
string output = c.SaveToString();
            Assert.AreEqual(expected, output);
        }

        
/// <summary>
        /// Input with complicated server controls in attribute values, attribute place and text
        /// </summary>
        /// <remarks>Agility lowercases MORESERVER STUFF</remarks>
        [Test]
        
public void TestInputWithServer()
        {
            
string input = @"<INPUT VALUE=""<%=UPPERCASE RUNNING=SERVER%>"" <%=MORESERVER STUFF%>>";
            
string expected = @"<input value=""<%=UPPERCASE RUNNING=SERVER%>"" <%=MORESERVER STUFF%>/>";
            
ConvertWithRegex c = new ConvertWithRegex();
            c.LoadHtml(input);
            
string output = c.SaveToString();
            Assert.AreEqual(expected, output);
        }

        
/// <summary>
        /// some &lt;&gt; within the server control that looks like a tag
        /// </summary>
        [Test]
        
public void TestTagLikeInServer()
        {
            
string input = @"<% if(A<B && B>A)Response.Write('OK')%>";
            
string expected = @"<% if(A<B && B>A)Response.Write('OK')%>";
            
ConvertWithRegex c = new ConvertWithRegex();
            c.LoadHtml(input);
            
string output = c.SaveToString();
            Assert.AreEqual(expected, output);
        }

        
/// <summary>
        /// Writing tags within server control (corner case whether to allow this?)
        /// </summary>
        /// <remarks>Agility doesn't lowercase the img, but regex does</remarks>
        [Test]
        
public void TestTagInServer()
        {
            
string input = @"<P><% if(HasImage) Response.Write(""<IMG SRC='MyImg.gif'>"")%></P>";
            
string expected = @"<p><% if(HasImage) Response.Write(""<img src='MyImg.gif'/>"")%></p>";
            
ConvertWithRegex c = new ConvertWithRegex();
            c.LoadHtml(input);
            
string output = c.SaveToString();
            Assert.AreEqual(expected, output);
        }

        
/// <summary>
        /// Don't touch stuff within the server tag (generally)
        /// </summary>
        [Test]
        
public void TestDontTouchServer()
        {
            
string input = @"<% strHTML = strHTML & ""<OPTION value="" + strQuote + a[""ID""] %>";
            
//note this will be converted...
            //string output= @"<% strHTML = strHTML & ""<OPTION value="" + strQuote + a[""ID""] + "">"" %>";
            string expected = @"<% strHTML = strHTML & ""<OPTION value="" + strQuote + a[""ID""] %>";
            
ConvertWithRegex c = new ConvertWithRegex();
            c.LoadHtml(input);
            
string output = c.SaveToString();
            Assert.AreEqual(expected, output);
        }

        
/// <summary>
        /// Simple server controls with attribute value
        /// </summary>
        /// <remarks>Agility puts quotes around the server attribute, but regex doesn't. Pick which you prefer...</remarks>
        [Test]
        
public void TestSelectWithServerAttribute()
        {
            
string input = @"<SELECT ID=<%=Server%>><OPTION VALUE=1>1</OPTION></Select>";
            
string expected = @"<select id=<%=Server%>><option value=""1"">1</option></select>";
            
ConvertWithRegex c = new ConvertWithRegex();
            c.LoadHtml(input);
            
string output = c.SaveToString();
            Assert.AreEqual(expected, output);
        }
    }
}