Convert HTML to XHTML
Test the version using regex- showing some of the features (and oddities).
using NUnit.Framework;
using System;
namespace XHtmlConvertor
{
/// <summary>
/// Test the html conversion with regex.
/// </summary>
/// <remarks>
/// Most of these fail with Agility because of slight differences-
/// some are minor formatting, but some are corner cases (both will change INSIDE server tags occasionally)
/// </remarks>
[TestFixture]
public class TestConvertWithRegex
{
/// <summary>
/// Lowercase and close elements.
/// </summary>
/// <remarks>Fails with agility (agility uses (space)/> to close tags)</remarks>
[Test]
public void TestBr()
{
string input = @"<P><BR><br/></P>";
string expected = @"<p><br/><br/></p>";
ConvertWithRegex c = new ConvertWithRegex();
c.LoadHtml(input);
string output = c.SaveToString();
Assert.AreEqual(expected, output);
}
/// <summary>
/// Lines within elements.
/// </summary>
/// <remarks>Regex version preserves the spaces and line bresaks, agility removes them and closes the div</remarks>
[Test]
public void TestMultiLine()
{
string input = @"<DIV
ID=""MYID"" CLASS = ""UGLY"">";
string expected = @"<div
id=""MYID"" class = ""UGLY"">";
ConvertWithRegex c = new ConvertWithRegex();
c.LoadHtml(input);
string output = c.SaveToString();
Assert.AreEqual(expected, output);
}
/// <summary>
/// Three different types of attribute (single/double quoted, unquoted, no value)
/// </summary>
/// <remarks>Agility makes single quotes to double quotes</remarks>
[Test]
public void TestAttributes()
{
string input = @"<INPUT VALUE='1' ID=""A"" READONLY TYPE=""text"">";
string expected = @"<input value='1' id=""A"" readonly=""readonly"" type=""text""/>";
ConvertWithRegex c = new ConvertWithRegex();
c.LoadHtml(input);
string output = c.SaveToString();
Assert.AreEqual(expected, output);
}
/// <summary>
/// Insert a doctype
/// </summary>
[Test]
public void TestDocType()
{
string input = @"<HTML><HEAD></HEAD><BODY bgColor=#eeeeee><H1>HELLO</H1></BODY></HTML>";
string expected = @"<!DOCTYPE html PUBLIC ""-//W3C//DTD XHTML 1.0 Transitional//EN"" ""http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"">" +
"\r\n" + @"<html xmlns=""http://www.w3.org/1999/xhtml""><head></head><body bgcolor=""#eeeeee""><h1>HELLO</h1></body></html>";
ConvertWithRegex c = new ConvertWithRegex();
c.LoadHtml(input);
string output = c.SaveToString();
Assert.AreEqual(expected, output);
}
/// <summary>
/// Remove the language tag
/// </summary>
[Test]
public void TestScriptLanguage()
{
string input = @"<SCRIPT LANGUAGE=""JAVASCRIPT""><!--\nalert(""Here"");\n//--></SCRIPT>";
string expected = @"<script><!--\nalert(""Here"");\n//--></script>";
ConvertWithRegex c = new ConvertWithRegex();
c.LoadHtml(input);
string output = c.SaveToString();
Assert.AreEqual(expected, output);
}
/// <summary>
/// Change html tags but not asp controls
/// </summary>
/// <remarks>Agility alters the case of asp controls and adds a closing element</remarks>
[Test]
public void TestAspControl()
{
string input = @"<P><asp:TextBox Runat=""Server"" Value=""Test""/></P>";
string expected = @"<p><asp:TextBox Runat=""Server"" Value=""Test""/></p>";
ConvertWithRegex c = new ConvertWithRegex();
c.LoadHtml(input);
string output = c.SaveToString();
Assert.AreEqual(expected, output);
}
/// <summary>
/// Input with complicated server controls in attribute values, attribute place and text
/// </summary>
/// <remarks>Agility lowercases MORESERVER STUFF</remarks>
[Test]
public void TestInputWithServer()
{
string input = @"<INPUT VALUE=""<%=UPPERCASE RUNNING=SERVER%>"" <%=MORESERVER STUFF%>>";
string expected = @"<input value=""<%=UPPERCASE RUNNING=SERVER%>"" <%=MORESERVER STUFF%>/>";
ConvertWithRegex c = new ConvertWithRegex();
c.LoadHtml(input);
string output = c.SaveToString();
Assert.AreEqual(expected, output);
}
/// <summary>
/// some <> within the server control that looks like a tag
/// </summary>
[Test]
public void TestTagLikeInServer()
{
string input = @"<% if(A<B && B>A)Response.Write('OK')%>";
string expected = @"<% if(A<B && B>A)Response.Write('OK')%>";
ConvertWithRegex c = new ConvertWithRegex();
c.LoadHtml(input);
string output = c.SaveToString();
Assert.AreEqual(expected, output);
}
/// <summary>
/// Writing tags within server control (corner case whether to allow this?)
/// </summary>
/// <remarks>Agility doesn't lowercase the img, but regex does</remarks>
[Test]
public void TestTagInServer()
{
string input = @"<P><% if(HasImage) Response.Write(""<IMG SRC='MyImg.gif'>"")%></P>";
string expected = @"<p><% if(HasImage) Response.Write(""<img src='MyImg.gif'/>"")%></p>";
ConvertWithRegex c = new ConvertWithRegex();
c.LoadHtml(input);
string output = c.SaveToString();
Assert.AreEqual(expected, output);
}
/// <summary>
/// Don't touch stuff within the server tag (generally)
/// </summary>
[Test]
public void TestDontTouchServer()
{
string input = @"<% strHTML = strHTML & ""<OPTION value="" + strQuote + a[""ID""] %>";
//note this will be converted...
//string output= @"<% strHTML = strHTML & ""<OPTION value="" + strQuote + a[""ID""] + "">"" %>";
string expected = @"<% strHTML = strHTML & ""<OPTION value="" + strQuote + a[""ID""] %>";
ConvertWithRegex c = new ConvertWithRegex();
c.LoadHtml(input);
string output = c.SaveToString();
Assert.AreEqual(expected, output);
}
/// <summary>
/// Simple server controls with attribute value
/// </summary>
/// <remarks>Agility puts quotes around the server attribute, but regex doesn't. Pick which you prefer...</remarks>
[Test]
public void TestSelectWithServerAttribute()
{
string input = @"<SELECT ID=<%=Server%>><OPTION VALUE=1>1</OPTION></Select>";
string expected = @"<select id=<%=Server%>><option value=""1"">1</option></select>";
ConvertWithRegex c = new ConvertWithRegex();
c.LoadHtml(input);
string output = c.SaveToString();
Assert.AreEqual(expected, output);
}
}
}
using System;
namespace XHtmlConvertor
{
/// <summary>
/// Test the html conversion with regex.
/// </summary>
/// <remarks>
/// Most of these fail with Agility because of slight differences-
/// some are minor formatting, but some are corner cases (both will change INSIDE server tags occasionally)
/// </remarks>
[TestFixture]
public class TestConvertWithRegex
{
/// <summary>
/// Lowercase and close elements.
/// </summary>
/// <remarks>Fails with agility (agility uses (space)/> to close tags)</remarks>
[Test]
public void TestBr()
{
string input = @"<P><BR><br/></P>";
string expected = @"<p><br/><br/></p>";
ConvertWithRegex c = new ConvertWithRegex();
c.LoadHtml(input);
string output = c.SaveToString();
Assert.AreEqual(expected, output);
}
/// <summary>
/// Lines within elements.
/// </summary>
/// <remarks>Regex version preserves the spaces and line bresaks, agility removes them and closes the div</remarks>
[Test]
public void TestMultiLine()
{
string input = @"<DIV
ID=""MYID"" CLASS = ""UGLY"">";
string expected = @"<div
id=""MYID"" class = ""UGLY"">";
ConvertWithRegex c = new ConvertWithRegex();
c.LoadHtml(input);
string output = c.SaveToString();
Assert.AreEqual(expected, output);
}
/// <summary>
/// Three different types of attribute (single/double quoted, unquoted, no value)
/// </summary>
/// <remarks>Agility makes single quotes to double quotes</remarks>
[Test]
public void TestAttributes()
{
string input = @"<INPUT VALUE='1' ID=""A"" READONLY TYPE=""text"">";
string expected = @"<input value='1' id=""A"" readonly=""readonly"" type=""text""/>";
ConvertWithRegex c = new ConvertWithRegex();
c.LoadHtml(input);
string output = c.SaveToString();
Assert.AreEqual(expected, output);
}
/// <summary>
/// Insert a doctype
/// </summary>
[Test]
public void TestDocType()
{
string input = @"<HTML><HEAD></HEAD><BODY bgColor=#eeeeee><H1>HELLO</H1></BODY></HTML>";
string expected = @"<!DOCTYPE html PUBLIC ""-//W3C//DTD XHTML 1.0 Transitional//EN"" ""http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"">" +
"\r\n" + @"<html xmlns=""http://www.w3.org/1999/xhtml""><head></head><body bgcolor=""#eeeeee""><h1>HELLO</h1></body></html>";
ConvertWithRegex c = new ConvertWithRegex();
c.LoadHtml(input);
string output = c.SaveToString();
Assert.AreEqual(expected, output);
}
/// <summary>
/// Remove the language tag
/// </summary>
[Test]
public void TestScriptLanguage()
{
string input = @"<SCRIPT LANGUAGE=""JAVASCRIPT""><!--\nalert(""Here"");\n//--></SCRIPT>";
string expected = @"<script><!--\nalert(""Here"");\n//--></script>";
ConvertWithRegex c = new ConvertWithRegex();
c.LoadHtml(input);
string output = c.SaveToString();
Assert.AreEqual(expected, output);
}
/// <summary>
/// Change html tags but not asp controls
/// </summary>
/// <remarks>Agility alters the case of asp controls and adds a closing element</remarks>
[Test]
public void TestAspControl()
{
string input = @"<P><asp:TextBox Runat=""Server"" Value=""Test""/></P>";
string expected = @"<p><asp:TextBox Runat=""Server"" Value=""Test""/></p>";
ConvertWithRegex c = new ConvertWithRegex();
c.LoadHtml(input);
string output = c.SaveToString();
Assert.AreEqual(expected, output);
}
/// <summary>
/// Input with complicated server controls in attribute values, attribute place and text
/// </summary>
/// <remarks>Agility lowercases MORESERVER STUFF</remarks>
[Test]
public void TestInputWithServer()
{
string input = @"<INPUT VALUE=""<%=UPPERCASE RUNNING=SERVER%>"" <%=MORESERVER STUFF%>>";
string expected = @"<input value=""<%=UPPERCASE RUNNING=SERVER%>"" <%=MORESERVER STUFF%>/>";
ConvertWithRegex c = new ConvertWithRegex();
c.LoadHtml(input);
string output = c.SaveToString();
Assert.AreEqual(expected, output);
}
/// <summary>
/// some <> within the server control that looks like a tag
/// </summary>
[Test]
public void TestTagLikeInServer()
{
string input = @"<% if(A<B && B>A)Response.Write('OK')%>";
string expected = @"<% if(A<B && B>A)Response.Write('OK')%>";
ConvertWithRegex c = new ConvertWithRegex();
c.LoadHtml(input);
string output = c.SaveToString();
Assert.AreEqual(expected, output);
}
/// <summary>
/// Writing tags within server control (corner case whether to allow this?)
/// </summary>
/// <remarks>Agility doesn't lowercase the img, but regex does</remarks>
[Test]
public void TestTagInServer()
{
string input = @"<P><% if(HasImage) Response.Write(""<IMG SRC='MyImg.gif'>"")%></P>";
string expected = @"<p><% if(HasImage) Response.Write(""<img src='MyImg.gif'/>"")%></p>";
ConvertWithRegex c = new ConvertWithRegex();
c.LoadHtml(input);
string output = c.SaveToString();
Assert.AreEqual(expected, output);
}
/// <summary>
/// Don't touch stuff within the server tag (generally)
/// </summary>
[Test]
public void TestDontTouchServer()
{
string input = @"<% strHTML = strHTML & ""<OPTION value="" + strQuote + a[""ID""] %>";
//note this will be converted...
//string output= @"<% strHTML = strHTML & ""<OPTION value="" + strQuote + a[""ID""] + "">"" %>";
string expected = @"<% strHTML = strHTML & ""<OPTION value="" + strQuote + a[""ID""] %>";
ConvertWithRegex c = new ConvertWithRegex();
c.LoadHtml(input);
string output = c.SaveToString();
Assert.AreEqual(expected, output);
}
/// <summary>
/// Simple server controls with attribute value
/// </summary>
/// <remarks>Agility puts quotes around the server attribute, but regex doesn't. Pick which you prefer...</remarks>
[Test]
public void TestSelectWithServerAttribute()
{
string input = @"<SELECT ID=<%=Server%>><OPTION VALUE=1>1</OPTION></Select>";
string expected = @"<select id=<%=Server%>><option value=""1"">1</option></select>";
ConvertWithRegex c = new ConvertWithRegex();
c.LoadHtml(input);
string output = c.SaveToString();
Assert.AreEqual(expected, output);
}
}
}