Hello every one i am using HTml Agility and Openxml to convert my html content to word file content.
<div>
<div id="container">
<div>
<div>
<!--content starts here//-->
<form name="questions" method="post">
<img src="../../content/0/Static UPload/Divya_3LevelLeftMenu_Operating System v8.0 English/unit9/lesson27/../../images/less_title_27.jpg" width="750" height="75">
<div id="title">Exercise
<table border="0" cellspacing="20" cellpadding="0">
<tr>
<td><b> Student's Name: </b><br>
<input type="text" name="b1" size="45"></td>
<td><b>Class:</b><br>
<input type="text" name="b2" size="45"></td>
</tr>
</table>
<td width="176" align="left"> </td>
<tr><td width="779" align="left"> </td>
</tr>
<ol>
<li>Describe the purpose of Windows Update.
<p align="left"><textarea name="a1" rows="10" wrap="VIRTUAL" cols="55"></textarea></p>
</li>
</ol>
<ol start="2">
<li>Explain why using Windows Update is critical to maintaining an operating system.
<p align="left"><textarea name="a2" rows="10" wrap="VIRTUAL" cols="55"></textarea></p>
</li>
</ol>
<ol start="3">
<li>Summarize the process used to access and install Windows Updates.
<p align="left"><textarea name="a3" rows="10" wrap="VIRTUAL" cols="55"></textarea></p>
</li>
</ol>
<ol start="4">
<li>Compare and contrast using Windows Update and using a Windows Service Pack.
<p align="left"><textarea name="a4" rows="10" wrap="VIRTUAL" cols="55"></textarea></p>
</li>
</ol>
<center><p><b>Note: You must print your completed exercise
to submit to your instructor.</b><br>
<b class="style1"><u>Do Not</u></b> close this window without printing your exercise or your answers will be lost.<br><br>
<input onclick="reLoadMe(document.questions) " type="button" value="Print Preview">
</p>
</center>
</form>
<div align="center"><a href="#top"><img src="../../content/0/Static UPload/Divya_3LevelLeftMenu_Operating System v8.0 English/unit9/lesson27/../../images/back_to_top.jpg" alt="" width="40" height="21" border="0"></a>
</div></div></div></div></div></div>
this is the html content i am using to convert.
But i am getting the following error while parsing it.
at NotesFor.HtmlToOpenXml.TableContext.get_CurrentTable()
at NotesFor.HtmlToOpenXml.HtmlConverter.ProcessTableColumn(HtmlEnumerator en)
at NotesFor.HtmlToOpenXml.HtmlConverter.ProcessHtmlChunks(HtmlEnumerator en, String endTag)
at NotesFor.HtmlToOpenXml.HtmlConverter.Parse(String html)
at WebApplication3.WebForm3.Button1_Click(Object sender, EventArgs e) in C:\Users\USER\Documents\Visual Studio 2008\Projects\Piyush_training\WebApplication3\WebForm3.aspx.cs:line 102
my code is as follows.
using DocumentFormat.OpenXml.Drawing;
using NotesFor.HtmlToOpenXml;
using System.IO;
using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml.Wordprocessing;
using wp = DocumentFormat.OpenXml.Drawing.Wordprocessing;
using DocumentFormat.OpenXml;
using HtmlAgilityPack;
using System.Text;
protected void Button1_Click(object sender, EventArgs e)
{
const string filename = "C:/Temp/test.docx";
Response.ContentEncoding = System.Text.Encoding.UTF7;
System.Text.StringBuilder SB = new System.Text.StringBuilder();
System.IO.StringWriter SW = new System.IO.StringWriter();
string pagecontent=above html Content;
HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
doc.LoadHtml(pagecontent);
if (doc == null) ;
doc.OptionCheckSyntax = true;
doc.OptionAutoCloseOnEnd = true;
doc.OptionFixNestedTags = true;
int errorCount = doc.ParseErrors.Count();
string output = “”;
doc.Save(SW);
System.Web.UI.HtmlTextWriter htmlTW = new System.Web.UI.HtmlTextWriter(SW);
strBody = "<html>" + "<body>" + "<div><b>" + htmlTW.InnerWriter.ToString() + "</b></div>" + "</body>" + "</html>";
string html = strBody;
try
{
using (MemoryStream generatedDocument = new MemoryStream())
{
using (WordprocessingDocument package = WordprocessingDocument.Create(generatedDocument, WordprocessingDocumentType.Document))
{
MainDocumentPart mainPart = package.MainDocumentPart;
if (mainPart == null)
{
mainPart = package.AddMainDocumentPart();
new Document(new Body()).Save(mainPart);
}
HtmlConverter converter = new HtmlConverter(mainPart);
converter.ExcludeLinkAnchor = true;
converter.RefreshStyles();
converter.ImageProcessing = ImageProcessing.AutomaticDownload;
Body body = mainPart.Document.Body;
converter.ConsiderDivAsParagraph = false;
var paragraphs = converter.Parse(html);
for (int i = 0; i < paragraphs.Count; i++)
{
body.Append(paragraphs[i]);
}
mainPart.Document.Save();
}
File.WriteAllBytes(filename, generatedDocument.ToArray());
}
System.Diagnostics.Process.Start(filename);
}
catch (Exception ex)
{
Response.Write(ex.ToString());
}
}
Use this to get content with images working.
To use the AltChunk method you have to use an existent file. Create the file dynamically with any content first, because altChunk doesn’t accept a blank file.