Class HtmlLoadOptions
Namespace: Aspose.Words.Loading
Assembly: Aspose.Words.dll (26.2.0)
Allows to specify additional options when loading HTML document into a Aspose.Words.Document object.
To learn more, visit the Specify Load Options documentation article.
public class HtmlLoadOptions : LoadOptionsInheritance
object ← LoadOptions ← HtmlLoadOptions
Inherited Members
LoadOptions.Equals(object) , LoadOptions.LoadFormat , LoadOptions.Password , LoadOptions.BaseUri , LoadOptions.Encoding , LoadOptions.ResourceLoadingCallback , LoadOptions.WarningCallback , LoadOptions.ProgressCallback , LoadOptions.PreserveIncludePictureField , LoadOptions.ConvertShapeToOfficeMath , LoadOptions.FontSettings , LoadOptions.TempFolder , LoadOptions.ConvertMetafilesToPng , LoadOptions.MswVersion , LoadOptions.UpdateDirtyFields , LoadOptions.IgnoreOleData , LoadOptions.UseSystemLcid , LoadOptions.LanguagePreferences , LoadOptions.RecoveryMode , object.GetType() , object.MemberwiseClone() , object.ToString() , object.Equals(object?) , object.Equals(object?, object?) , object.ReferenceEquals(object?, object?) , object.GetHashCode()
Examples
Shows how to support conditional comments while loading an HTML document.
HtmlLoadOptions loadOptions = new HtmlLoadOptions();
// If the value is true, then we take VML code into account while parsing the loaded document.
loadOptions.SupportVml = supportVml;
// This document contains a JPEG image within "<!--[if gte vml 1]>" tags,
// and a different PNG image within "<![if !vml]>" tags.
// If we set the "SupportVml" flag to "true", then Aspose.Words will load the JPEG.
// If we set this flag to "false", then Aspose.Words will only load the PNG.
Document doc = new Document(MyDir + "VML conditional.htm", loadOptions);
if (supportVml)
Assert.That(((Shape)doc.GetChild(NodeType.Shape, 0, true)).ImageData.ImageType, Is.EqualTo(ImageType.Jpeg));
else
Assert.That(((Shape)doc.GetChild(NodeType.Shape, 0, true)).ImageData.ImageType, Is.EqualTo(ImageType.Png));Constructors
HtmlLoadOptions()
Initializes a new instance of this class with default values.
public HtmlLoadOptions()Examples
Shows how to support conditional comments while loading an HTML document.
HtmlLoadOptions loadOptions = new HtmlLoadOptions();
// If the value is true, then we take VML code into account while parsing the loaded document.
loadOptions.SupportVml = supportVml;
// This document contains a JPEG image within "<!--[if gte vml 1]>" tags,
// and a different PNG image within "<![if !vml]>" tags.
// If we set the "SupportVml" flag to "true", then Aspose.Words will load the JPEG.
// If we set this flag to "false", then Aspose.Words will only load the PNG.
Document doc = new Document(MyDir + "VML conditional.htm", loadOptions);
if (supportVml)
Assert.That(((Shape)doc.GetChild(NodeType.Shape, 0, true)).ImageData.ImageType, Is.EqualTo(ImageType.Jpeg));
else
Assert.That(((Shape)doc.GetChild(NodeType.Shape, 0, true)).ImageData.ImageType, Is.EqualTo(ImageType.Png));HtmlLoadOptions(string)
A shortcut to initialize a new instance of this class with the specified password to load an encrypted document.
public HtmlLoadOptions(string password)Parameters
password string
The password to open an encrypted document. Can be null or empty string.
Examples
Shows how to encrypt an Html document, and then open it using a password.
// Create and sign an encrypted HTML document from an encrypted .docx.
CertificateHolder certificateHolder = CertificateHolder.Create(MyDir + "morzal.pfx", "aw");
SignOptions signOptions = new SignOptions
{
Comments = "Comment",
SignTime = DateTime.Now,
DecryptionPassword = "docPassword"
};
string inputFileName = MyDir + "Encrypted.docx";
string outputFileName = ArtifactsDir + "HtmlLoadOptions.EncryptedHtml.html";
DigitalSignatureUtil.Sign(inputFileName, outputFileName, certificateHolder, signOptions);
// To load and read this document, we will need to pass its decryption
// password using a HtmlLoadOptions object.
HtmlLoadOptions loadOptions = new HtmlLoadOptions("docPassword");
Assert.That(loadOptions.Password, Is.EqualTo(signOptions.DecryptionPassword));
Document doc = new Document(outputFileName, loadOptions);
Assert.That(doc.GetText().Trim(), Is.EqualTo("Test encrypted document."));HtmlLoadOptions(LoadFormat, string, string)
A shortcut to initialize a new instance of this class with properties set to the specified values.
public HtmlLoadOptions(LoadFormat loadFormat, string password, string baseUri)Parameters
loadFormat LoadFormat
The format of the document to be loaded.
password string
The password to open an encrypted document. Can be null or empty string.
baseUri string
The string that will be used to resolve relative URIs to absolute. Can be null or empty string.
Examples
Shows how to specify a base URI when opening an html document.
// Suppose we want to load an .html document that contains an image linked by a relative URI
// while the image is in a different location. In that case, we will need to resolve the relative URI into an absolute one.
// We can provide a base URI using an HtmlLoadOptions object.
HtmlLoadOptions loadOptions = new HtmlLoadOptions(LoadFormat.Html, "", ImageDir);
Assert.That(loadOptions.LoadFormat, Is.EqualTo(LoadFormat.Html));
Document doc = new Document(MyDir + "Missing image.html", loadOptions);
// While the image was broken in the input .html, our custom base URI helped us repair the link.
Shape imageShape = (Shape)doc.GetChildNodes(NodeType.Shape, true)[0];
Assert.That(imageShape.IsImage, Is.True);
// This output document will display the image that was missing.
doc.Save(ArtifactsDir + "HtmlLoadOptions.BaseUri.docx");Properties
BlockImportMode
Gets or sets a value that specifies how properties of block-level elements are imported. Default value is Aspose.Words.Loading.BlockImportMode.Merge.
public BlockImportMode BlockImportMode { get; set; }Property Value
Examples
Shows how properties of block-level elements are imported from HTML-based documents.
const string html = @"
<html>
<div style='border:dotted'>
<div style='border:solid'>
<p>paragraph 1</p>
<p>paragraph 2</p>
</div>
</div>
</html>";
MemoryStream stream = new MemoryStream(Encoding.UTF8.GetBytes(html));
HtmlLoadOptions loadOptions = new HtmlLoadOptions();
// Set the new mode of import HTML block-level elements.
loadOptions.BlockImportMode = blockImportMode;
Document doc = new Document(stream, loadOptions);
doc.Save(ArtifactsDir + "HtmlLoadOptions.BlockImport.docx");ConvertSvgToEmf
Gets or sets a value indicating whether to convert loaded SVG images to the EMF format.
Default value is false and, if possible, loaded SVG images are stored as is without conversion.
public bool ConvertSvgToEmf { get; set; }Property Value
Examples
Shows how to convert SVG objects to a different format when saving HTML documents.
string html =
@"<html>
<svg xmlns='http://www.w3.org/2000/svg' width='500' height='40' viewBox='0 0 500 40'>
<text x='0' y='35' font-family='Verdana' font-size='35'>Hello world!</text>
</svg>
</html>";
// Use 'ConvertSvgToEmf' to turn back the legacy behavior
// where all SVG images loaded from an HTML document were converted to EMF.
// Now SVG images are loaded without conversion
// if the MS Word version specified in load options supports SVG images natively.
HtmlLoadOptions loadOptions = new HtmlLoadOptions { ConvertSvgToEmf = true };
Document doc = new Document(new MemoryStream(Encoding.UTF8.GetBytes(html)), loadOptions);
// This document contains a <svg> element in the form of text.
// When we save the document to HTML, we can pass a SaveOptions object
// to determine how the saving operation handles this object.
// Setting the "MetafileFormat" property to "HtmlMetafileFormat.Png" to convert it to a PNG image.
// Setting the "MetafileFormat" property to "HtmlMetafileFormat.Svg" preserve it as a SVG object.
// Setting the "MetafileFormat" property to "HtmlMetafileFormat.EmfOrWmf" to convert it to a metafile.
HtmlSaveOptions options = new HtmlSaveOptions { MetafileFormat = htmlMetafileFormat };
doc.Save(ArtifactsDir + "HtmlSaveOptions.MetafileFormat.html", options);
string outDocContents = File.ReadAllText(ArtifactsDir + "HtmlSaveOptions.MetafileFormat.html");
switch (htmlMetafileFormat)
{
case HtmlMetafileFormat.Png:
Assert.That(outDocContents.Contains(
"<p style=\"margin-top:0pt; margin-bottom:0pt\">" +
"<img src=\"HtmlSaveOptions.MetafileFormat.001.png\" width=\"500\" height=\"40\" alt=\"\" " +
"style=\"-aw-left-pos:0pt; -aw-rel-hpos:column; -aw-rel-vpos:paragraph; -aw-top-pos:0pt; -aw-wrap-type:inline\" />" +
"</p>"), Is.True);
break;
case HtmlMetafileFormat.Svg:
Assert.That(outDocContents.Contains(
"<span style=\"-aw-left-pos:0pt; -aw-rel-hpos:column; -aw-rel-vpos:paragraph; -aw-top-pos:0pt; -aw-wrap-type:inline\">" +
"<svg xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\" version=\"1.1\" width=\"499\" height=\"40\">"), Is.True);
break;
case HtmlMetafileFormat.EmfOrWmf:
Assert.That(outDocContents.Contains(
"<p style=\"margin-top:0pt; margin-bottom:0pt\">" +
"<img src=\"HtmlSaveOptions.MetafileFormat.001.emf\" width=\"500\" height=\"40\" alt=\"\" " +
"style=\"-aw-left-pos:0pt; -aw-rel-hpos:column; -aw-rel-vpos:paragraph; -aw-top-pos:0pt; -aw-wrap-type:inline\" />" +
"</p>"), Is.True);
break;
}Remarks
Newer versions of MS Word support SVG images natively. If the MS Word version specified in load options supports SVG, Aspose.Words will store SVG images as is without conversion. If SVG is not supported, loaded SVG images will be converted to the EMF format.
If, however, this option is set to true, Aspose.Words will convert loaded SVG images to EMF even if SVG
images are supported by the specified version of MS Word.
IgnoreNoscriptElements
Gets or sets a value indicating whether to ignore <noscript> HTML elements.
Default value is false.
public bool IgnoreNoscriptElements { get; set; }Property Value
Examples
Shows how to ignore <noscript> HTML elements.
const string html = @"
<html>
<head>
<title>NOSCRIPT</title>
<meta http-equiv=""Content-Type"" content=""text/html; charset=utf-8"">
<script type=""text/javascript"">
alert(""Hello, world!"");
</script>
</head>
<body>
<noscript><p>Your browser does not support JavaScript!</p></noscript>
</body>
</html>";
HtmlLoadOptions htmlLoadOptions = new HtmlLoadOptions();
htmlLoadOptions.IgnoreNoscriptElements = ignoreNoscriptElements;
Document doc = new Document(new MemoryStream(Encoding.UTF8.GetBytes(html)), htmlLoadOptions);
doc.Save(ArtifactsDir + "HtmlLoadOptions.IgnoreNoscriptElements.pdf");Remarks
Like MS Word, Aspose.Words does not support scripts and by default loads content of <noscript> elements
into the resulting document. In most browsers, however, scripts are supported and content from <noscript>
is not visible. Setting this property to true forces Aspose.Words to ignore all <noscript> elements
and helps to produce documents that look closer to what is seen in browsers.
PreferredControlType
Gets or sets preferred type of document nodes that will represent imported <input> and <select> elements. Default value is Aspose.Words.Loading.HtmlControlType.FormField.
public HtmlControlType PreferredControlType { get; set; }Property Value
Examples
Shows how to set preferred type of document nodes that will represent imported <input> and <select> elements.
const string html = @"
<html>
<select name='ComboBox' size='1'>
<option value='val1'>item1</option>
<option value='val2'></option>
</select>
</html>
";
HtmlLoadOptions htmlLoadOptions = new HtmlLoadOptions();
htmlLoadOptions.PreferredControlType = HtmlControlType.StructuredDocumentTag;
Document doc = new Document(new MemoryStream(Encoding.UTF8.GetBytes(html)), htmlLoadOptions);
NodeCollection nodes = doc.GetChildNodes(NodeType.StructuredDocumentTag, true);
StructuredDocumentTag tag = (StructuredDocumentTag) nodes[0];Remarks
Please note that setting this property does not guarantee that all imported controls will be of the specified type. If an HTML control is not representable with document nodes of the preferred type, Aspose.Words will use a compatible Aspose.Words.Loading.HtmlControlType for that control.
SupportFontFaceRules
Gets or sets a value indicating whether to support @font-face rules and whether to load declared fonts.
Default value is false.
public bool SupportFontFaceRules { get; set; }Property Value
Examples
Shows how to load declared “@font-face” rules.
HtmlLoadOptions loadOptions = new HtmlLoadOptions();
loadOptions.SupportFontFaceRules = true;
Document doc = new Document(MyDir + "Html with FontFace.html", loadOptions);
Assert.That(doc.FontInfos[0].Name, Is.EqualTo("Squarish Sans CT Regular"));Remarks
If this option is enabled, fonts declared in @font-face rules are loaded and embedded into the resulting document's
font definitions (see Aspose.Words.DocumentBase.FontInfos). This makes the loaded fonts available for rendering but
doesn't automatically enable embedding of the fonts upon saving. In order to save the document with loaded fonts,
the Aspose.Words.Fonts.FontInfoCollection.EmbedTrueTypeFonts property of the Aspose.Words.DocumentBase.FontInfos
collection should be set to true.
Supported font formats are TTF, EOT, and WOFF.
@font-face rules are not supported when loading SVG images.
SupportVml
Gets or sets a value indicating whether to support VML images.
public bool SupportVml { get; set; }Property Value
Examples
Shows how to support conditional comments while loading an HTML document.
HtmlLoadOptions loadOptions = new HtmlLoadOptions();
// If the value is true, then we take VML code into account while parsing the loaded document.
loadOptions.SupportVml = supportVml;
// This document contains a JPEG image within "<!--[if gte vml 1]>" tags,
// and a different PNG image within "<![if !vml]>" tags.
// If we set the "SupportVml" flag to "true", then Aspose.Words will load the JPEG.
// If we set this flag to "false", then Aspose.Words will only load the PNG.
Document doc = new Document(MyDir + "VML conditional.htm", loadOptions);
if (supportVml)
Assert.That(((Shape)doc.GetChild(NodeType.Shape, 0, true)).ImageData.ImageType, Is.EqualTo(ImageType.Jpeg));
else
Assert.That(((Shape)doc.GetChild(NodeType.Shape, 0, true)).ImageData.ImageType, Is.EqualTo(ImageType.Png));WebRequestTimeout
The number of milliseconds to wait before the web request times out. The default value is 100000 milliseconds (100 seconds).
public int WebRequestTimeout { get; set; }Property Value
Examples
Shows how to set a time limit for web requests when loading a document with external resources linked by URLs.
public void WebRequestTimeout()
{
// Create a new HtmlLoadOptions object and verify its timeout threshold for a web request.
HtmlLoadOptions options = new HtmlLoadOptions();
// When loading an Html document with resources externally linked by a web address URL,
// Aspose.Words will abort web requests that fail to fetch the resources within this time limit, in milliseconds.
Assert.That(options.WebRequestTimeout, Is.EqualTo(100000));
// Set a WarningCallback that will record all warnings that occur during loading.
ListDocumentWarnings warningCallback = new ListDocumentWarnings();
options.WarningCallback = warningCallback;
// Load such a document and verify that a shape with image data has been created.
// This linked image will require a web request to load, which will have to complete within our time limit.
string html = $@"
<html>
<img src=""{ImageUrl}"" alt=""Aspose logo"" style=""width:400px;height:400px;"">
</html>
";
// Set an unreasonable timeout limit and try load the document again.
options.WebRequestTimeout = 0;
Document doc = new Document(new MemoryStream(Encoding.UTF8.GetBytes(html)), options);
Assert.That(warningCallback.Warnings().Count, Is.EqualTo(2));
// A web request that fails to obtain an image within the time limit will still produce an image.
// However, the image will be the red 'x' that commonly signifies missing images.
Shape imageShape = (Shape)doc.GetChild(NodeType.Shape, 0, true);
Assert.That(imageShape.ImageData.ImageBytes.Length, Is.EqualTo(924));
// We can also configure a custom callback to pick up any warnings from timed out web requests.
Assert.That(warningCallback.Warnings()[0].Source, Is.EqualTo(WarningSource.Html));
Assert.That(warningCallback.Warnings()[0].WarningType, Is.EqualTo(WarningType.DataLoss));
Assert.That(warningCallback.Warnings()[0].Description, Is.EqualTo($"Couldn't load a resource from \'{ImageUrl}\'."));
Assert.That(warningCallback.Warnings()[1].Source, Is.EqualTo(WarningSource.Html));
Assert.That(warningCallback.Warnings()[1].WarningType, Is.EqualTo(WarningType.DataLoss));
Assert.That(warningCallback.Warnings()[1].Description, Is.EqualTo("Image has been replaced with a placeholder."));
doc.Save(ArtifactsDir + "HtmlLoadOptions.WebRequestTimeout.docx");
}
/// <summary>
/// Stores all warnings that occur during a document loading operation in a List.
/// </summary>
private class ListDocumentWarnings : IWarningCallback
{
public void Warning(WarningInfo info)
{
mWarnings.Add(info);
}
public List<WarningInfo> Warnings() {
return mWarnings;
}
private readonly List<WarningInfo> mWarnings = new List<WarningInfo>();
}Remarks
The number of milliseconds that Aspose.Words waits for a response, when loading external resources (images, style sheets) linked in HTML and MHTML documents.