Class HTMLDocument

Class HTMLDocument

Namespace: Aspose.Html
Assembly: Aspose.HTML.dll (25.2.0)

An HTMLDocument is the root of the HTML hierarchy and holds the entire content. Besides providing access to the hierarchy, it also provides some convenience methods for accessing certain sets of information from the document.

The following properties have been deprecated in favor of the corresponding ones for the BODY element. In DOM Level 2, the method getElementById is inherited from the Document interface where it was moved to.

See also the Document object Model (DOM) Level 2 HTML Specification.

[ComVisible(true)]
[DOMObject]
[DOMName("HTMLDocument")]
public class HTMLDocument : Document, INotifyPropertyChanged, IEventTarget, IDisposable, IXPathNSResolver, IDocumentTraversal, IXPathEvaluator, IDocumentEvent, IParentNode, IElementTraversal, INonElementParentNode, IGlobalEventHandlers, IDocumentCSS, IDocumentStyle

Inheritance

objectDOMObjectEventTargetNodeDocumentHTMLDocument

Implements

INotifyPropertyChanged, IEventTarget, IDisposable, IXPathNSResolver, IDocumentTraversal, IXPathEvaluator, IDocumentEvent, IParentNode, IElementTraversal, INonElementParentNode, IGlobalEventHandlers, IDocumentCSS, IDocumentStyle

Inherited Members

Document.Navigate(string), Document.Navigate(Url), Document.Navigate(string, string), Document.Navigate(string, Url), Document.Navigate(Stream, string), Document.Navigate(Stream, Url), Document.Navigate(RequestMessage), Document.CreateElement(string), Document.CreateElementNS(string, string), Document.CreateDocumentFragment(), Document.CreateTextNode(string), Document.CreateComment(string), Document.CreateCDATASection(string), Document.CreateProcessingInstruction(string, string), Document.CreateAttribute(string), Document.CreateAttributeNS(string, string), Document.CreateEntityReference(string), Document.CreateDocumentType(string, string, string, string), Document.GetElementsByTagName(string), Document.GetElementsByTagNameNS(string, string), Document.GetElementById(string), Document.GetElementsByClassName(string), Document.CreateNodeIterator(Node), Document.CreateNodeIterator(Node, long), Document.CreateNodeIterator(Node, long, INodeFilter), Document.CreateTreeWalker(Node), Document.CreateTreeWalker(Node, long), Document.CreateTreeWalker(Node, long, INodeFilter), Document.QuerySelectorAll(string), Document.QuerySelector(string), Document.ImportNode(Node, bool), Document.CreateEvent(string), Document.Write(params string[]), Document.WriteLn(params string[]), Document.CreateExpression(string, IXPathNSResolver), Document.CreateNSResolver(Node), Document.Evaluate(string, Node, IXPathNSResolver, XPathResultType, object), Document.RenderTo(IDevice), Document.Dispose(bool), Document.OwnerDocument, Document.Context, Document.Implementation, Document.Location, Document.DocumentURI, Document.Origin, Document.BaseURI, Document.NodeName, Document.NodeType, Document.CharacterSet, Document.Charset, Document.InputEncoding, Document.ContentType, Document.ReadyState, Document.Doctype, Document.DocumentElement, Document.FirstElementChild, Document.LastElementChild, Document.PreviousElementSibling, Document.NextElementSibling, Document.ChildElementCount, Document.Children, Document.XmlStandalone, Document.XmlVersion, Document.StrictErrorChecking, Document.DefaultView, Document.StyleSheets, Document.OnReadyStateChange, Document.OnAbort, Document.OnBlur, Document.OnCancel, Document.OnCanplay, Document.OnCanPlayThrough, Document.OnChange, Document.OnClick, Document.OnCueChange, Document.OnDblClick, Document.OnDurationChange, Document.OnEmptied, Document.OnEnded, Document.OnError, Document.OnFocus, Document.OnInput, Document.OnInvalid, Document.OnKeyDown, Document.OnKeyPress, Document.OnKeyUp, Document.OnLoad, Document.OnLoadedData, Document.OnLoadedMetadata, Document.OnLoadStart, Document.OnMouseDown, Document.OnMouseEnter, Document.OnMouseLeave, Document.OnMouseMove, Document.OnMouseOut, Document.OnMouseOver, Document.OnMouseUp, Document.OnMouseWheel, Document.OnPause, Document.OnPlay, Document.OnPlaying, Document.OnProgress, Document.OnRateChange, Document.OnReset, Document.OnResize, Document.OnScroll, Document.OnSeeked, Document.OnSeeking, Document.OnSelect, Document.OnShow, Document.OnStalled, Document.OnSubmit, Document.OnSuspend, Document.OnTimeUpdate, Document.OnToggle, Document.OnVolumeChange, Document.OnWaiting, Node.ELEMENT_NODE, Node.ATTRIBUTE_NODE, Node.TEXT_NODE, Node.CDATA_SECTION_NODE, Node.ENTITY_REFERENCE_NODE, Node.ENTITY_NODE, Node.PROCESSING_INSTRUCTION_NODE, Node.COMMENT_NODE, Node.DOCUMENT_NODE, Node.DOCUMENT_TYPE_NODE, Node.DOCUMENT_FRAGMENT_NODE, Node.NOTATION_NODE, Node.HasChildNodes(), Node.Normalize(), Node.CloneNode(), Node.CloneNode(bool), Node.IsEqualNode(Node), Node.IsSameNode(Node), Node.LookupPrefix(string), Node.LookupNamespaceURI(string), Node.IsDefaultNamespace(string), Node.InsertBefore(Node, Node), Node.ReplaceChild(Node, Node), Node.RemoveChild(Node), Node.AppendChild(Node), Node.Dispose(bool), Node.ToString(), Node.NodeType, Node.LocalName, Node.NamespaceURI, Node.Prefix, Node.NodeName, Node.BaseURI, Node.OwnerDocument, Node.ParentNode, Node.ParentElement, Node.ChildNodes, Node.FirstChild, Node.LastChild, Node.PreviousSibling, Node.NextSibling, Node.NodeValue, Node.TextContent, EventTarget.AddEventListener(string, DOMEventHandler, bool), EventTarget.AddEventListener(string, IEventListener), EventTarget.AddEventListener(string, IEventListener, bool), EventTarget.RemoveEventListener(string, DOMEventHandler, bool), EventTarget.RemoveEventListener(string, IEventListener), EventTarget.RemoveEventListener(string, IEventListener, bool), EventTarget.DispatchEvent(Event), EventTarget.Dispose(), EventTarget.Dispose(bool), DOMObject.GetPlatformType(), object.GetType(), object.MemberwiseClone(), object.ToString(), object.Equals(object?), object.Equals(object?, object?), object.ReferenceEquals(object?, object?), object.GetHashCode()

Constructors

HTMLDocument()

Initializes a new instance of the Aspose.Html.HTMLDocument class.

public HTMLDocument()

HTMLDocument(Configuration)

Initializes a new instance of the Aspose.Html.HTMLDocument class.

public HTMLDocument(Configuration configuration)

Parameters

configuration Configuration

The environment configuration.

HTMLDocument(Url)

Initializes a new instance of the Aspose.Html.HTMLDocument class. Constructor works synchronously, it waits for loading of all the external resources (images, scripts, etc.). To load document asynchronously use method Aspose.Html.Dom.Document.Navigate(Aspose.Html.Url) or its overloads. Or you can disable loading of some external resources by setting appropriate flags in Aspose.Html.Dom.IBrowsingContext.Security.

public HTMLDocument(Url url)

Parameters

url Url

The document URL.

HTMLDocument(Url, Configuration)

Initializes a new instance of the Aspose.Html.HTMLDocument class. Constructor works synchronously, it waits for loading of all the external resources (images, scripts, etc.). To load document asynchronously use method Aspose.Html.Dom.Document.Navigate(Aspose.Html.Url) or its overloads. Or you can disable loading of some external resources by setting appropriate flags in Aspose.Html.Dom.IBrowsingContext.Security.

public HTMLDocument(Url url, Configuration configuration)

Parameters

url Url

The document URL.

configuration Configuration

The environment configuration.

HTMLDocument(string)

Initializes a new instance of the Aspose.Html.HTMLDocument class. Constructor works synchronously, it waits for loading of all the external resources (images, scripts, etc.). To load document asynchronously use method Aspose.Html.Dom.Document.Navigate(System.String) or its overloads. Or you can disable loading of some external resources by setting appropriate flags in Aspose.Html.Dom.IBrowsingContext.Security.

public HTMLDocument(string address)

Parameters

address string

The document address. It will be combined with the current directory path to form an absolute URL.

HTMLDocument(string, Configuration)

Initializes a new instance of the Aspose.Html.HTMLDocument class. Constructor works synchronously, it waits for loading of all the external resources (images, scripts, etc.). To load document asynchronously use method Aspose.Html.Dom.Document.Navigate(System.String) or its overloads. Or you can disable loading of some external resources by setting appropriate flags in Aspose.Html.Dom.IBrowsingContext.Security.

public HTMLDocument(string address, Configuration configuration)

Parameters

address string

The document address. It will be combined with the current directory path to form an absolute URL.

configuration Configuration

The environment configuration.

HTMLDocument(string, string)

Initializes a new instance of the Aspose.Html.HTMLDocument class. Constructor works synchronously, it waits for loading of all the external resources (images, scripts, etc.). To load document asynchronously use method Aspose.Html.Dom.Document.Navigate(System.String,System.String) or its overloads. Or you can disable loading of some external resources by setting appropriate flags in Aspose.Html.Dom.IBrowsingContext.Security.

public HTMLDocument(string content, string baseUri)

Parameters

content string

The document content.

baseUri string

The base URI of the document. It will be combined with the current directory path to form an absolute URL.

Exceptions

ArgumentNullException

baseUri is null.

HTMLDocument(string, string, Configuration)

Initializes a new instance of the Aspose.Html.HTMLDocument class. Constructor works synchronously, it waits for loading of all the external resources (images, scripts, etc.). To load document asynchronously use method Aspose.Html.Dom.Document.Navigate(System.String,System.String) or its overloads. Or you can disable loading of some external resources by setting appropriate flags in Aspose.Html.Dom.IBrowsingContext.Security.

public HTMLDocument(string content, string baseUri, Configuration configuration)

Parameters

content string

The document content.

baseUri string

The base URI of the document. It will be combined with the current directory path to form an absolute URL.

configuration Configuration

The environment configuration.

Exceptions

ArgumentNullException

baseUri is null.

HTMLDocument(string, Url)

Initializes a new instance of the Aspose.Html.HTMLDocument class. Constructor works synchronously, it waits for loading of all the external resources (images, scripts, etc.). To load document asynchronously use method Aspose.Html.Dom.Document.Navigate(System.String,Aspose.Html.Url) or its overloads. Or you can disable loading of some external resources by setting appropriate flags in Aspose.Html.Dom.IBrowsingContext.Security.

public HTMLDocument(string content, Url baseUri)

Parameters

content string

The document content.

baseUri Url

The base URI of the document.

Exceptions

ArgumentNullException

baseUri is null.

HTMLDocument(string, Url, Configuration)

Initializes a new instance of the Aspose.Html.HTMLDocument class. Constructor works synchronously, it waits for loading of all the external resources (images, scripts, etc.). To load document asynchronously use method Aspose.Html.Dom.Document.Navigate(System.String,Aspose.Html.Url) or its overloads. Or you can disable loading of some external resources by setting appropriate flags in Aspose.Html.Dom.IBrowsingContext.Security.

public HTMLDocument(string content, Url baseUri, Configuration configuration)

Parameters

content string

The document content.

baseUri Url

The base URI of the document.

configuration Configuration

The environment configuration.

Exceptions

ArgumentNullException

baseUri is null.

HTMLDocument(Stream, string)

Initializes a new instance of the Aspose.Html.HTMLDocument class. Constructor works synchronously, it waits for loading of all the external resources (images, scripts, etc.). To load document asynchronously use method Aspose.Html.Dom.Document.Navigate(System.IO.Stream,System.String) or its overloads. Or you can disable loading of some external resources by setting appropriate flags in Aspose.Html.Dom.IBrowsingContext.Security. Document loading starts from the current position in the stream.

public HTMLDocument(Stream content, string baseUri)

Parameters

content Stream

The document content.

baseUri string

The base URI of the document. It will be combined with the current directory path to form an absolute URL.

Exceptions

ArgumentNullException

baseUri is null.

HTMLDocument(Stream, string, Configuration)

Initializes a new instance of the Aspose.Html.HTMLDocument class. Constructor works synchronously, it waits for loading of all the external resources (images, scripts, etc.). To load document asynchronously use method Aspose.Html.Dom.Document.Navigate(System.IO.Stream,System.String) or its overloads. Or you can disable loading of some external resources by setting appropriate flags in Aspose.Html.Dom.IBrowsingContext.Security. Document loading starts from the current position in the stream.

public HTMLDocument(Stream content, string baseUri, Configuration configuration)

Parameters

content Stream

The document content.

baseUri string

The base URI of the document. It will be combined with the current directory path to form an absolute URL.

configuration Configuration

The environment configuration.

Exceptions

ArgumentNullException

baseUri is null.

HTMLDocument(Stream, Url)

Initializes a new instance of the Aspose.Html.HTMLDocument class. Constructor works synchronously, it waits for loading of all the external resources (images, scripts, etc.). To load document asynchronously use method Aspose.Html.Dom.Document.Navigate(System.IO.Stream,Aspose.Html.Url) or its overloads. Or you can disable loading of some external resources by setting appropriate flags in Aspose.Html.Dom.IBrowsingContext.Security. Document loading starts from the current position in the stream.

public HTMLDocument(Stream content, Url baseUri)

Parameters

content Stream

The document content.

baseUri Url

The base URI of the document.

Exceptions

ArgumentNullException

baseUri is null.

HTMLDocument(Stream, Url, Configuration)

Initializes a new instance of the Aspose.Html.HTMLDocument class. Constructor works synchronously, it waits for loading of all the external resources (images, scripts, etc.). To load document asynchronously use method Aspose.Html.Dom.Document.Navigate(System.IO.Stream,Aspose.Html.Url) or its overloads. Or you can disable loading of some external resources by setting appropriate flags in Aspose.Html.Dom.IBrowsingContext.Security. Document loading starts from the current position in the stream.

public HTMLDocument(Stream content, Url baseUri, Configuration configuration)

Parameters

content Stream

The document content.

baseUri Url

The base URI of the document.

configuration Configuration

The environment configuration.

Exceptions

ArgumentNullException

baseUri is null.

HTMLDocument(RequestMessage)

Initializes a new instance of the Aspose.Html.HTMLDocument class. Constructor works synchronously, it waits for loading of all the external resources (images, scripts, etc.). To load document asynchronously use method Aspose.Html.Dom.Document.Navigate(Aspose.Html.Net.RequestMessage) or its overloads. Or you can disable loading of some external resources by setting appropriate flags in Aspose.Html.Dom.IBrowsingContext.Security.

public HTMLDocument(RequestMessage request)

Parameters

request RequestMessage

The request message.

HTMLDocument(RequestMessage, Configuration)

Initializes a new instance of the Aspose.Html.HTMLDocument class. Constructor works synchronously, it waits for loading of all the external resources (images, scripts, etc.). To load document asynchronously use method Aspose.Html.Dom.Document.Navigate(Aspose.Html.Net.RequestMessage) or its overloads. Or you can disable loading of some external resources by setting appropriate flags in Aspose.Html.Dom.IBrowsingContext.Security.

public HTMLDocument(RequestMessage request, Configuration configuration)

Parameters

request RequestMessage

The request message.

configuration Configuration

The environment configuration.

Properties

Anchors

A collection of all the anchor (A) elements in a document with a value for the name attribute. For reasons of backward compatibility, the returned set of anchors only contains those anchors created with the name attribute, not those created with the id attribute. Note that in [XHTML 1.0], the name attribute (see section 4.10) has no semantics and is only present for legacy user agents: the id attribute is used instead. Users should prefer the iterator mechanisms provided by [DOM Level 2 Traversal] instead.

[DOMName("anchors")]
public HTMLCollection Anchors { get; }

Property Value

HTMLCollection

Applets

A collection of all the OBJECT elements that include applets and APPLET (deprecated) elements in a document.

[DOMName("applets")]
public HTMLCollection Applets { get; }

Property Value

HTMLCollection

Body

The element that contains the content for the document. In documents with BODY contents, returns the BODY element. In frameset documents, this returns the outermost FRAMESET element.

[DOMName("body")]
public HTMLElement Body { get; set; }

Property Value

HTMLElement

Domain

The domain name of the server that served the document, or null if the server cannot be identified by a domain name.

[DOMNullable]
[DOMName("domain")]
public string Domain { get; }

Property Value

string

Forms

A collection of all the forms of a document.

[DOMName("forms")]
public HTMLCollection Forms { get; }

Property Value

HTMLCollection

Images

A collection of all the IMG elements in a document. The behavior is limited to IMG elements for backwards compatibility. As suggested by [HTML 4.01], to include images, authors may use the OBJECT element or the IMG element. Therefore, it is recommended not to use this attribute to find the images in the document but getElementsByTagName with HTML 4.01 or getElementsByTagNameNS with XHTML 1.0.

[DOMName("images")]
public HTMLCollection Images { get; }

Property Value

HTMLCollection

Links

A collection of all AREA elements and anchor ( A) elements in a document with a value for the href attribute.

[DOMName("links")]
public HTMLCollection Links { get; }

Property Value

HTMLCollection

Referrer

Returns the URI [IETF RFC 2396] of the page that linked to this page. The value is an empty string if the user navigated to the page directly (not through a link, but, for example, via a bookmark).

[DOMName("referrer")]
public string Referrer { get; }

Property Value

string

Title

The title of a document as specified by the TITLE element in the head of the document.

[DOMName("title")]
public string Title { get; set; }

Property Value

string

Methods

GetOverrideStyle(Element, string)

This method is used to retrieve the override style declaration for a specified element and a specified pseudo-element.

public ICSSStyleDeclaration GetOverrideStyle(Element elt, string pseudoElt)

Parameters

elt Element

The element whose style is to be modified. This parameter cannot be null.

pseudoElt string

The pseudo-element or null if none.

Returns

ICSSStyleDeclaration

The override style declaration

RenderTo(IDevice)

This method is used to print the contents of the current document to the specified device.

public override void RenderTo(IDevice device)

Parameters

device IDevice

The user device.

Save(Url)

Saves the document to local file specified by url. All resources used in this document will be saved in to adjacent folder, whose name will be constructed as: output_file_name + “_files”.

public void Save(Url url)

Parameters

url Url

Local URL to output file.

Exceptions

ArgumentException

Raised if the specified url is not a valid local file URL.

Save(ResourceHandler)

Saves the document content and resources using the Aspose.Html.Saving.ResourceHandlers.ResourceHandler.

public void Save(ResourceHandler resourceHandler)

Parameters

resourceHandler ResourceHandler

The resource handler Aspose.Html.Saving.ResourceHandlers.ResourceHandler.

Save(string)

Saves the document to local file specified by path. All resources used in this document will be saved in to adjacent folder, whose name will be constructed as: output_file_name + “_files”.

public void Save(string path)

Parameters

path string

Local path to output file.

Exceptions

ArgumentException

Raised if the specified path is not a valid local file path.

Save(string, HTMLSaveFormat)

Saves the document to local file specified by path. All resources used in this document will be saved in to adjacent folder, whose name will be constructed as: output_file_name + “_files”.

public void Save(string path, HTMLSaveFormat saveFormat)

Parameters

path string

Local path to output file.

saveFormat HTMLSaveFormat

Format in which document is saved.

Exceptions

ArgumentException

Raised if the specified path is not a valid local file path.

Save(Url, HTMLSaveFormat)

Saves the document to local file specified by url. All resources used in this document will be saved in to adjacent folder, whose name will be constructed as: output_file_name + “_files”.

public void Save(Url url, HTMLSaveFormat saveFormat)

Parameters

url Url

Local URL to output file.

saveFormat HTMLSaveFormat

Format in which document is saved.

Exceptions

ArgumentException

Raised if the specified url is not a valid local file URL.

Save(ResourceHandler, HTMLSaveFormat)

Saves the document content and resources using the Aspose.Html.Saving.ResourceHandlers.ResourceHandler.

public void Save(ResourceHandler resourceHandler, HTMLSaveFormat saveFormat)

Parameters

resourceHandler ResourceHandler

The resource handler Aspose.Html.Saving.ResourceHandlers.ResourceHandler.

saveFormat HTMLSaveFormat

Format in which document is saved.

Save(string, HTMLSaveOptions)

Saves the document to local file specified by path. All resources used in this document will be saved in to adjacent folder, whose name will be constructed as: output_file_name + “_files”.

public void Save(string path, HTMLSaveOptions saveOptions)

Parameters

path string

Local path to output file.

saveOptions HTMLSaveOptions

HTML save options.

Exceptions

ArgumentException

Raised if the specified path is not a valid local file path.

Save(Url, HTMLSaveOptions)

Saves the document to local file specified by url. All resources used in this document will be saved in to adjacent folder, whose name will be constructed as: output_file_name + “_files”.

public void Save(Url url, HTMLSaveOptions saveOptions)

Parameters

url Url

Local URL to output file.

saveOptions HTMLSaveOptions

HTML save options.

Exceptions

ArgumentException

Raised if the specified url is not a valid local file URL.

Save(ResourceHandler, HTMLSaveOptions)

Saves the document content and resources using the Aspose.Html.Saving.ResourceHandlers.ResourceHandler.

public void Save(ResourceHandler resourceHandler, HTMLSaveOptions saveOptions)

Parameters

resourceHandler ResourceHandler

The resource handler Aspose.Html.Saving.ResourceHandlers.ResourceHandler.

saveOptions HTMLSaveOptions

HTML save options.

Save(string, MarkdownSaveOptions)

Saves the document to local file specified by path. All resources used in this document will be saved in to adjacent folder, whose name will be constructed as: output_file_name + “_files”.

public void Save(string path, MarkdownSaveOptions saveOptions)

Parameters

path string

Local path to output file.

saveOptions MarkdownSaveOptions

Markdown save options.

Exceptions

ArgumentException

Raised if the specified path is not a valid local file path.

Save(Url, MarkdownSaveOptions)

Saves the document to local file specified by url. All resources used in this document will be saved in to adjacent folder, whose name will be constructed as: output_file_name + “_files”.

public void Save(Url url, MarkdownSaveOptions saveOptions)

Parameters

url Url

Local URL to output file.

saveOptions MarkdownSaveOptions

Markdown save options.

Exceptions

ArgumentException

Raised if the specified url is not a valid local file URL.

Save(ResourceHandler, MarkdownSaveOptions)

Saves the document content and resources using the Aspose.Html.Saving.ResourceHandlers.ResourceHandler.

public void Save(ResourceHandler resourceHandler, MarkdownSaveOptions saveOptions)

Parameters

resourceHandler ResourceHandler

The resource handler Aspose.Html.Saving.ResourceHandlers.ResourceHandler.

saveOptions MarkdownSaveOptions

Markdown save options.

Save(string, MHTMLSaveOptions)

Saves the document to local file specified by path. All resources used in this document will be saved in to adjacent folder, whose name will be constructed as: output_file_name + “_files”.

public void Save(string path, MHTMLSaveOptions saveOptions)

Parameters

path string

Local path to output file.

saveOptions MHTMLSaveOptions

MHTML save options.

Exceptions

ArgumentException

Raised if the specified path is not a valid local file path.

Save(Url, MHTMLSaveOptions)

Saves the document to local file specified by url. All resources used in this document will be saved in to adjacent folder, whose name will be constructed as: output_file_name + “_files”.

public void Save(Url url, MHTMLSaveOptions saveOptions)

Parameters

url Url

Local URL to output file.

saveOptions MHTMLSaveOptions

MHTML save options.

Exceptions

ArgumentException

Raised if the specified url is not a valid local file URL.

Save(ResourceHandler, MHTMLSaveOptions)

Saves the document content and resources using the Aspose.Html.Saving.ResourceHandlers.ResourceHandler.

public void Save(ResourceHandler resourceHandler, MHTMLSaveOptions saveOptions)

Parameters

resourceHandler ResourceHandler

The resource handler Aspose.Html.Saving.ResourceHandlers.ResourceHandler.

saveOptions MHTMLSaveOptions

MHTML save options.