-
Notifications
You must be signed in to change notification settings - Fork 12
/
Copy pathmod_HTMLXML.bas
88 lines (79 loc) · 3.13 KB
/
mod_HTMLXML.bas
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
Attribute VB_Name = "mod_HTMLXML"
Option Explicit
Public Function FilterHTML(ByVal RawHTML As String) As String
If Len(RawHTML) = 0 Then Exit Function
Dim HTMLEntities As Variant, HTMLReplacements As Variant, Index As Long
Const REG_HTMLTAGS = "(<[\w\s""':.=-]*>|<\/[\w\s""':.=-]*>)"
HTMLEntities = Array(" ", "<", ">", "&", """, "'")
HTMLReplacements = Array(" ", "<", ">", "&", """", "'")
'Parse HTML Entities into plaintext
For Index = 0 To UBound(HTMLEntities)
RawHTML = Replace(RawHTML, HTMLEntities(Index), HTMLReplacements(Index))
Next Index
'Remove any stray HTML tags
Dim TargetTags() As String: TargetTags = RegExer(RawHTML, REG_HTMLTAGS)
'Preemptively remove new line characters with actual new lines to separate any conjoined lines.
RawHTML = Replace(RawHTML, "<b>", " ")
For Index = 0 To UBound(TargetTags)
RawHTML = Replace(RawHTML, TargetTags(Index), "")
Next Index
FilterHTML = RawHTML
End Function
Public Function GetHTTP(ByVal URL As String) As String
On Error Resume Next
With CreateObject("WinHttp.WinHttpRequest.5.1")
.Open "GET", URL, False
.Send
GetHTTP = .ResponseText
End With
End Function
Public Function GetXML(ByVal URL As String) As String
On Error Resume Next
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", URL, False
.Send
GetXML = .ResponseText
End With
End Function
Public Function GetHTML(ByVal URL As String) As String
On Error GoTo ErrorHandler
Dim IE As Object: Set IE = CreateObject("InternetExplorer.Application")
Dim HTML As Object
IE.navigate URL
Do Until IE.ReadyState = 4: DoEvents: Loop
Do While IE.Busy: DoEvents: Loop
Set HTML = IE.Document.Body
GetHTML = HTML.innerHTML
IE.Quit
ErrorHandler:
Set IE = Nothing
Set HTML = Nothing
End Function
Public Function CheckHTTP(ByVal URL As String) As Integer
On Error GoTo ErrorHandler
Dim URLReq As Object: Set URLReq = CreateObject("WinHttp.WinHttpRequest.5.1")
If UCase(Left(URL, 4)) <> "HTTP" Then URL = "HTTP://" & URL
URLReq.Open "GET", URL
URLReq.Send
CheckHTTP = CInt(URLReq.Status)
ErrorHandler:
Set URLReq = Nothing
End Function
Public Function DownloadFileHTTP(ByVal SourceURL As String, ByVal LocalFile As String, Optional ByVal Username As String, Optional ByRef Password As String) As Boolean
On Error GoTo ErrorHandler
If Len(SourceURL) = 0 Or Len(LocalFile) = 0 Then Exit Function
Dim WHTTPReq As Object: Set WHTTPReq = CreateObject("Microsoft.XMLHTTP")
WHTTPReq.Open "GET", SourceURL, False, Username, Password
WHTTPReq.Send
If Not WHTTPReq.Status = 200 Then Exit Function
With CreateObject("ADODB.Stream")
.Open
.Type = 1
.Write WHTTPReq.responseBody
.SaveToFile LocalFile, 2 ' 1 = no overwrite, 2 = overwrite
.Close
End With
DownloadFileHTTP = CreateObject("Scripting.FileSystemObject").FileExists(LocalFile)
ErrorHandler:
Set WHTTPReq = Nothing
End Function