Monday, September 04, 2006

Table Normalization in XSLT 2.0

Some time ago I wrote a 1.0 stylesheet that normalizes a table - it's currently available on Dave Pawson's FAQ.

The problem of table normalization is to remove colspans and rowspans from a table placing a copy of the content in each cell.

This table:


+-----------+-----------+
| a | b |
| +-----+-----+
| | c | d |
+-----------+-----+ |
| e | |
+-----+-----+-----+ |
| f | g | h | |
+-----+-----+-----+-----+


Would become this:


+-----+-----+-----+-----+
| a | a | b | b |
+-----+-----+-----+-----+
| a | a | c | d |
+-----+-----+-----+-----+
| e | e | e | d |
+-----+-----+-----+-----+
| f | g | h | d |
+-----+-----+-----+-----+


The 1.0 solution I wrote used a neat recursive template that maintained pointers to the previous and current rows to deal with the tricky rowspans (colspans are the easy part). In rewriting the transform in 2.0 I've simplified it a great deal, but sadly not with any new 2.0 only feature, just with a better algorithm. Here's the transform:


<xsl:stylesheet version="2.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:xs="http://www.w3.org/2001/XMLSchema"
exclude-result-prefixes="xs">

<xsl:output method="xhtml" indent="yes"
omit-xml-declaration="yes"
encoding="UTF-8" />

<xsl:variable name="table_with_no_colspans">
<xsl:apply-templates mode="colspan"/>
</xsl:variable>

<xsl:variable name="table_with_no_rowspans">
<xsl:for-each select="$table_with_no_colspans">
<xsl:apply-templates mode="rowspan"/>
</xsl:for-each>
</xsl:variable>

<xsl:template match="/">
<xsl:apply-templates select="$table_with_no_rowspans" mode="final"/>
</xsl:template>

<xsl:template match="@*|*" mode="#all">
<xsl:copy>
<xsl:apply-templates select="@*|*" mode="#current"/>
</xsl:copy>
</xsl:template>

<xsl:template match="td" mode="colspan">
<xsl:choose>
<xsl:when test="@colspan">
<xsl:variable name="this" select="." as="element()"/>
<xsl:for-each select="1 to @colspan">
<td>
<xsl:copy-of select="$this/@*[not(name() = 'colspan')][not(name() = 'width')]"/>
<xsl:copy-of select="$this/node()"/>
</td>
</xsl:for-each>
</xsl:when>
<xsl:otherwise>
<xsl:copy-of select="."/>
</xsl:otherwise>
</xsl:choose>
</xsl:template>

<xsl:template match="tbody" mode="rowspan">
<xsl:copy>
<xsl:copy-of select="tr[1]" />
<xsl:apply-templates select="tr[2]" mode="rowspan">
<xsl:with-param name="previousRow" select="tr[1]"/>
</xsl:apply-templates>
</xsl:copy>
</xsl:template>

<xsl:template match="tr" mode="rowspan">
<xsl:param name="previousRow"/>

<xsl:variable name="currentRow" select="."/>

<xsl:variable name="normalizedTDs">
<xsl:for-each select="$previousRow/td">
<xsl:choose>
<xsl:when test="@rowspan > 1">
<xsl:copy>
<xsl:attribute name="rowspan">
<xsl:value-of select="@rowspan - 1" />
</xsl:attribute>

<xsl:copy-of select="@*[not(name() = 'rowspan')]"/>
<xsl:copy-of select="node()"/>
</xsl:copy>
</xsl:when>
<xsl:otherwise>
<xsl:copy-of select="$currentRow/td[1 + count(current()/preceding-sibling::td[not(@rowspan) or (@rowspan = 1)])]"/>
</xsl:otherwise>
</xsl:choose>
</xsl:for-each>
</xsl:variable>

<xsl:variable name="newRow" as="element(tr)">
<xsl:copy>
<xsl:copy-of select="$currentRow/@*"/>
<xsl:copy-of select="$normalizedTDs"/>
</xsl:copy>
</xsl:variable>

<xsl:copy-of select="$newRow"/>

<xsl:apply-templates select="following-sibling::tr[1]" mode="rowspan">
<xsl:with-param name="previousRow" select="$newRow"/>
</xsl:apply-templates>
</xsl:template>

<xsl:template match="td" mode="final">
<xsl:choose>
<xsl:when test="@rowspan">
<xsl:copy>
<xsl:copy-of select="@*[not(name() = 'rowspan')]"/>
<xsl:copy-of select="node()"/>
</xsl:copy>
</xsl:when>
<xsl:otherwise>
<xsl:copy-of select="."/>
</xsl:otherwise>
</xsl:choose>
</xsl:template>

</xsl:stylesheet>