Skip to content

Commit c49d438

Browse files
committed
fix: fix template export generating corrupted Excel files
- Change SharedString conversion to use inlineStr format with <is><t> structure - Add SetCellType method to properly handle cell types (inlineStr for strings, remove t attr for numbers) - Fix XML element order per ECMA-376 spec (autoFilter before mergeCells/phoneticPr) - Clean duplicate xmlns declarations from phoneticPr, conditionalFormatting, autoFilter - Remove empty <v></v> tags that cause invalid XML - Support <is><t> structure in value node lookup Fixes template SaveAs generating files that Excel cannot open
1 parent 5cfa15b commit c49d438

1 file changed

Lines changed: 119 additions & 21 deletions

File tree

src/MiniExcel.Core/OpenXml/Templates/OpenXmlTemplate.Impl.cs

Lines changed: 119 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -299,6 +299,15 @@ private async Task WriteSheetXmlAsync(Stream outputFileStream, XmlDocument doc,
299299
phoneticPr.ParentNode.RemoveChild(phoneticPr);
300300
}
301301

302+
// Extract autoFilter - must be written before mergeCells and phoneticPr per ECMA-376
303+
var autoFilter = doc.SelectSingleNode("/x:worksheet/x:autoFilter", Ns);
304+
var autoFilterXml = string.Empty;
305+
if (autoFilter is not null)
306+
{
307+
autoFilterXml = autoFilter.OuterXml;
308+
autoFilter.ParentNode.RemoveChild(autoFilter);
309+
}
310+
302311
var contents = doc.InnerXml.Split(new[] { $"<{prefix}sheetData>{{{{{{{{{{{{split}}}}}}}}}}}}</{prefix}sheetData>" }, StringSplitOptions.None);
303312

304313
using var writer = new StreamWriter(outputFileStream, Encoding.UTF8);
@@ -524,6 +533,19 @@ await writer.WriteAsync($"</{prefix}sheetData>"
524533
#endif
525534
).ConfigureAwait(false);
526535

536+
// ECMA-376 element order: sheetData → autoFilter → mergeCells → phoneticPr → conditionalFormatting
537+
538+
// 1. autoFilter (must come before mergeCells)
539+
if (!string.IsNullOrEmpty(autoFilterXml))
540+
{
541+
await writer.WriteAsync(CleanXml(autoFilterXml, endPrefix)
542+
#if NET7_0_OR_GREATER
543+
.AsMemory(), cancellationToken
544+
#endif
545+
).ConfigureAwait(false);
546+
}
547+
548+
// 2. mergeCells
527549
if (_newXMergeCellInfos.Count != 0)
528550
{
529551
await writer.WriteAsync($"<{prefix}mergeCells count=\"{_newXMergeCellInfos.Count}\">"
@@ -546,18 +568,20 @@ await writer.WriteLineAsync($"</{prefix}mergeCells>"
546568
).ConfigureAwait(false);
547569
}
548570

571+
// 3. phoneticPr
549572
if (!string.IsNullOrEmpty(phoneticPrXml))
550573
{
551-
await writer.WriteAsync(phoneticPrXml
574+
await writer.WriteAsync(CleanXml(phoneticPrXml, endPrefix)
552575
#if NET7_0_OR_GREATER
553576
.AsMemory(), cancellationToken
554577
#endif
555578
).ConfigureAwait(false);
556579
}
557580

581+
// 4. conditionalFormatting
558582
if (newConditionalFormatRanges.Count != 0)
559583
{
560-
await writer.WriteAsync(string.Join(string.Empty, newConditionalFormatRanges.Select(cf => cf.Node.OuterXml))
584+
await writer.WriteAsync(CleanXml(string.Join(string.Empty, newConditionalFormatRanges.Select(cf => cf.Node.OuterXml)), endPrefix)
561585
#if NET7_0_OR_GREATER
562586
.AsMemory(), cancellationToken
563587
#endif
@@ -762,6 +786,12 @@ private async Task<GenerateCellValuesContext> GenerateCellValuesAsync(GenerateCe
762786

763787
substXmlRow = rowXml.ToString();
764788
substXmlRow = TemplateRegex.Replace(substXmlRow, MatchDelegate);
789+
790+
// Cleanup empty <v> tags which defaults to invalid XML
791+
substXmlRow = substXmlRow.Replace("<v></v>", "");
792+
substXmlRow = substXmlRow.Replace("<x:v></x:v>", "");
793+
// Also clean up any potential empty t tags if inlineStr was somehow used empty
794+
// substXmlRow = substXmlRow.Replace("<t></t>", "");
765795
}
766796

767797
rowXml.Clear();
@@ -794,9 +824,13 @@ private async Task<GenerateCellValuesContext> GenerateCellValuesAsync(GenerateCe
794824
var mergeBaseRowIndex = newRowIndex;
795825
newRowIndex += rowInfo.IEnumerableMercell?.Height ?? 1;
796826

827+
// Replace {{$rowindex}} in the already-built substXmlRow
828+
rowXml.Replace("{{$rowindex}}", mergeBaseRowIndex.ToString());
829+
797830
// replace formulas
798831
ProcessFormulas(rowXml, newRowIndex);
799-
await writer.WriteAsync(CleanXml(rowXml, endPrefix).ToString()
832+
var finalXml = CleanXml(rowXml, endPrefix).ToString();
833+
await writer.WriteAsync(finalXml
800834
#if NET7_0_OR_GREATER
801835
.AsMemory(), cancellationToken
802836
#endif
@@ -1040,7 +1074,8 @@ private void ProcessFormulas(StringBuilder rowXml, int rowIndex)
10401074
private static string CleanXml(string xml, string endPrefix) => CleanXml(new StringBuilder(xml), endPrefix).ToString();
10411075
private static StringBuilder CleanXml(StringBuilder xml, string endPrefix) => xml
10421076
.Replace("xmlns:x14ac=\"http://schemas.microsoft.com/office/spreadsheetml/2009/9/ac\"", "")
1043-
.Replace($"xmlns{endPrefix}=\"http://schemas.openxmlformats.org/spreadsheetml/2006/main\"", "");
1077+
.Replace($"xmlns{endPrefix}=\"http://schemas.openxmlformats.org/spreadsheetml/2006/main\"", "")
1078+
.Replace("xmlns=\"http://schemas.openxmlformats.org/spreadsheetml/2006/main\"", "");
10441079

10451080
private static void ReplaceSharedStringsToStr(IDictionary<int, string> sharedStrings, XmlNodeList rows)
10461081
{
@@ -1061,10 +1096,67 @@ private static void ReplaceSharedStringsToStr(IDictionary<int, string> sharedStr
10611096
if (sharedStrings is null || !sharedStrings.TryGetValue(int.Parse(v.InnerText), out var shared))
10621097
continue;
10631098

1064-
// change type = str and replace its value
1065-
//TODO: remove sharedstring?
1066-
v.InnerText = shared;
1067-
c.SetAttribute("t", "str");
1099+
// change type = inlineStr and replace its value
1100+
c.RemoveChild(v);
1101+
var isNode = c.OwnerDocument.CreateElement("is", Schemas.SpreadsheetmlXmlns);
1102+
var tNode = c.OwnerDocument.CreateElement("t", Schemas.SpreadsheetmlXmlns);
1103+
tNode.InnerText = shared;
1104+
isNode.AppendChild(tNode);
1105+
c.AppendChild(isNode);
1106+
1107+
c.RemoveAttribute("t");
1108+
c.SetAttribute("t", "inlineStr");
1109+
}
1110+
}
1111+
}
1112+
1113+
private static void SetCellType(XmlElement c, string type)
1114+
{
1115+
if (type == "str") type = "inlineStr"; // Force inlineStr for strings
1116+
1117+
if (type == "inlineStr")
1118+
{
1119+
// Ensure <is><t>...</t></is>
1120+
c.SetAttribute("t", "inlineStr");
1121+
var v = c.SelectSingleNode("x:v", Ns);
1122+
if (v != null)
1123+
{
1124+
var text = v.InnerText;
1125+
c.RemoveChild(v);
1126+
var isNode = c.OwnerDocument.CreateElement("is", Schemas.SpreadsheetmlXmlns);
1127+
var tNode = c.OwnerDocument.CreateElement("t", Schemas.SpreadsheetmlXmlns);
1128+
tNode.InnerText = text;
1129+
isNode.AppendChild(tNode);
1130+
c.AppendChild(isNode);
1131+
}
1132+
else if (c.SelectSingleNode("x:is", Ns) == null)
1133+
{
1134+
// Create empty <is><t></t></is> if neither <v> nor <is> exists
1135+
var isNode = c.OwnerDocument.CreateElement("is", Schemas.SpreadsheetmlXmlns);
1136+
var tNode = c.OwnerDocument.CreateElement("t", Schemas.SpreadsheetmlXmlns);
1137+
isNode.AppendChild(tNode);
1138+
c.AppendChild(isNode);
1139+
}
1140+
}
1141+
else
1142+
{
1143+
// Ensure <v>...</v>
1144+
// For numbers/booleans, we remove 't' attribute to let it be default (number)
1145+
// or we could set it to 'n' explicitly, but removing is safer for general number types
1146+
if (type == "b")
1147+
c.SetAttribute("t", "b");
1148+
else
1149+
c.RemoveAttribute("t");
1150+
1151+
var isNode = c.SelectSingleNode("x:is", Ns);
1152+
if (isNode != null)
1153+
{
1154+
var tNode = isNode.SelectSingleNode("x:t", Ns);
1155+
var text = tNode?.InnerText;
1156+
c.RemoveChild(isNode);
1157+
var v = c.OwnerDocument.CreateElement("v", Schemas.SpreadsheetmlXmlns);
1158+
v.InnerText = text;
1159+
c.AppendChild(v);
10681160
}
10691161
}
10701162
}
@@ -1117,7 +1209,7 @@ private void UpdateDimensionAndGetRowsInfo(IDictionary<string, object?> inputMap
11171209
c.SetAttribute("r", $"{StringHelper.GetLetters(r)}{{{{$rowindex}}}}");
11181210
}
11191211

1120-
var v = c.SelectSingleNode("x:v", Ns);
1212+
var v = c.SelectSingleNode("x:v", Ns) ?? c.SelectSingleNode("x:is/x:t", Ns);
11211213
if (v?.InnerText is null)
11221214
continue;
11231215

@@ -1240,19 +1332,19 @@ private void UpdateDimensionAndGetRowsInfo(IDictionary<string, object?> inputMap
12401332

12411333
if (isMultiMatch)
12421334
{
1243-
c.SetAttribute("t", "str");
1335+
SetCellType(c, "str");
12441336
}
12451337
else if (TypeHelper.IsNumericType(type) && !type.IsEnum)
12461338
{
1247-
c.SetAttribute("t", "n");
1339+
SetCellType(c, "n");
12481340
}
12491341
else if (Type.GetTypeCode(type) == TypeCode.Boolean)
12501342
{
1251-
c.SetAttribute("t", "b");
1343+
SetCellType(c, "b");
12521344
}
12531345
else if (Type.GetTypeCode(type) == TypeCode.DateTime)
12541346
{
1255-
c.SetAttribute("t", "str");
1347+
SetCellType(c, "str");
12561348
}
12571349

12581350
break;
@@ -1292,36 +1384,36 @@ private void UpdateDimensionAndGetRowsInfo(IDictionary<string, object?> inputMap
12921384

12931385
if (isMultiMatch)
12941386
{
1295-
c.SetAttribute("t", "str");
1387+
SetCellType(c, "str");
12961388
}
12971389
else if (TypeHelper.IsNumericType(type) && !type.IsEnum)
12981390
{
1299-
c.SetAttribute("t", "n");
1391+
SetCellType(c, "n");
13001392
}
13011393
else if (Type.GetTypeCode(type) == TypeCode.Boolean)
13021394
{
1303-
c.SetAttribute("t", "b");
1395+
SetCellType(c, "b");
13041396
}
13051397
else if (Type.GetTypeCode(type) == TypeCode.DateTime)
13061398
{
1307-
c.SetAttribute("t", "str");
1399+
SetCellType(c, "str");
13081400
}
13091401
}
13101402
else
13111403
{
13121404
var cellValueStr = cellValue?.ToString(); // value did encodexml, so don't duplicate encode value (https://gitee.com/dotnetchina/MiniExcel/issues/I4DQUN)
13131405
if (isMultiMatch || cellValue is string) // if matchs count over 1 need to set type=str (https://user-images.githubusercontent.com/12729184/114530109-39d46d00-9c7d-11eb-8f6b-52ad8600aca3.png)
13141406
{
1315-
c.SetAttribute("t", "str");
1407+
SetCellType(c, "str");
13161408
}
13171409
else if (decimal.TryParse(cellValueStr, out var outV))
13181410
{
1319-
c.SetAttribute("t", "n");
1411+
SetCellType(c, "n");
13201412
cellValueStr = outV.ToString(CultureInfo.InvariantCulture);
13211413
}
13221414
else if (cellValue is bool b)
13231415
{
1324-
c.SetAttribute("t", "b");
1416+
SetCellType(c, "b");
13251417
cellValueStr = b ? "1" : "0";
13261418
}
13271419
else if (cellValue is DateTime timestamp)
@@ -1330,6 +1422,12 @@ private void UpdateDimensionAndGetRowsInfo(IDictionary<string, object?> inputMap
13301422
cellValueStr = timestamp.ToString("yyyy-MM-dd HH:mm:ss");
13311423
}
13321424

1425+
if (string.IsNullOrEmpty(cellValueStr) && string.IsNullOrEmpty(c.GetAttribute("t")))
1426+
{
1427+
SetCellType(c, "str");
1428+
v = c.SelectSingleNode("x:v", Ns) ?? c.SelectSingleNode("x:is/x:t", Ns);
1429+
}
1430+
13331431
v.InnerText = v.InnerText.Replace($"{{{{{propNames[0]}}}}}", cellValueStr); //TODO: auto check type and set value
13341432
}
13351433
}
@@ -1396,4 +1494,4 @@ private static bool EvaluateStatement(object tagValue, string comparisonOperator
13961494
_ => false
13971495
};
13981496
}
1399-
}
1497+
}

0 commit comments

Comments
 (0)