This article mainly discusses how to construct a POST request when uploading files using WebKitFormBoundary in VBA, as processing PDF document conversion on the web is quite convenient. The interfaces for document conversion and status checking (it is uncertain whether the status checking code is correct) need to be examined in the browser’s developer tools by viewing the JavaScript code and setting breakpoints to see the detailed parameters more clearly. This method of calling HttpRequest is somewhat unstable; sometimes all return information is correct, but the file cannot be downloaded. Traditional Chinese file names convert without issues on the web, but when using HttpRequest, the returned file address is garbled, and it is unclear where the problem lies.
Public Declare Function URLDownloadToFile Lib "urlmon" Alias "URLDownloadToFileA" (ByVal pCaller As Integer, ByVal szURL As String, ByVal szFileName As String, ByVal dwReserved As Integer, ByVal lpfnCB As Integer) As Long
Sub PDF2Docx() Dim json As Object Dim http As MSXML2.ServerXMLHTTP Dim bData() As Byte Dim strPath As String Dim FILENAME As String Dim strURL As String Dim strIndex As String Dim strFileURL As String Dim strStatus As String Dim strDownURL As String Dim strIndex2 As String Dim part As String Dim ado As Object Dim image Dim i As Integer Dim bCon As Boolean
If Len(Cells(1, 2)) = 0 Then MsgBox "File cannot be empty", vbInformation Exit Sub End If
If Right(Cells(1, 2), 4) <> ".pdf" Then MsgBox "Must be a PDF file", vbInformation Exit Sub End If
strPath = Cells(1, 2) FILENAME = Mid(strPath, InStrRev(strPath, "\") + 1) Set http = New MSXML2.ServerXMLHTTP
strURL = "https://www.cleverpdf.com/pdf/uploadFiles"
BOUNDARY = "WebKitFormBoundary" & Random16() part = "------" & BOUNDARY & vbCrLf part = part & "Content-Disposition: form-data; name=\"files\"" & vbCrLf part = part & vbCrLf & vbCrLf
part = part & "------" & BOUNDARY & vbCrLf part = part & "Content-Disposition: form-data; name=\"files\"; filename=\"" & FILENAME & "\"" & vbCrLf part = part & "Content-Type: " & "Content-Type: application/pdf" & vbCrLf & vbCrLf
Set ado = CreateObject("ADODB.Stream") ado.Type = 1 'binary ado.Open ado.LoadFromFile strPath ado.Position = 0 image = ado.read ado.Close
ado.Open ado.Position = 0 ado.Type = 1 ' binary ado.Write ToBytes(part) ado.Write image ado.Write ToBytes(vbCrLf & "------" & BOUNDARY & "--") ado.Position = 0
Application.StatusBar = "Uploading file..."
http.Open "POST", strURL, False http.setRequestHeader "Content-Type", "multipart/form-data; boundary=----" & BOUNDARY http.setRequestHeader "Referer", "https://www.cleverpdf.com/cn/pdf-to-word" http.setRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36" http.send ado.read
ado.Close
http.waitForResponse
If http.Status = 200 Then Set json = JsonConverter.ParseJson(http.responseText)
strIndex = json("index") strFileURL = json("url")
strStatus = json("STATUS")
strFileURL = "uploadFiles/file/" & strIndex & "/" & FILENAME BOUNDARY = "WebKitFormBoundary" & Random16()
part = "------" & BOUNDARY & vbCrLf part = part & "Content-Disposition: form-data; name=\"url\"" & vbCrLf & vbCrLf part = part & strFileURL & vbCrLf
part = part & "------" & BOUNDARY & vbCrLf part = part & "Content-Disposition: form-data; name=\"index\"" & vbCrLf & vbCrLf part = part & strIndex & vbCrLf
part = part & "------" & BOUNDARY & vbCrLf part = part & "Content-Disposition: form-data; name=\"pid\"" & vbCrLf & vbCrLf part = part & "1" & vbCrLf
part = part & "------" & BOUNDARY & vbCrLf part = part & "Content-Disposition: form-data; name=\"oid\"" & vbCrLf & vbCrLf part = part & "1" & vbCrLf
part = part & "------" & BOUNDARY & vbCrLf part = part & "Content-Disposition: form-data; name=\"status\"" & vbCrLf & vbCrLf part = part & strStatus & vbCrLf
part = part & "------" & BOUNDARY & vbCrLf part = part & "Content-Disposition: form-data; name=\"pwd\"" & vbCrLf & vbCrLf part = part & vbCrLf
part = part & "------" & BOUNDARY & vbCrLf part = part & "Content-Disposition: form-data; name=\"formatv1\"" & vbCrLf & vbCrLf part = part & "1" & vbCrLf
part = part & "------" & BOUNDARY & "--" & vbCrLf
part = StrConv(part, vbFromUnicode)
Application.StatusBar = "Converting file..."
http.Open "POST", "https://www.cleverpdf.com/pdf/doProcess.do", False http.setRequestHeader "Content-Type", "multipart/form-data; boundary=----" & BOUNDARY
ReDim bData(LenB(part)) bData = part
http.setRequestHeader "Content-Length", UBound(bData)
http.setRequestHeader "Referer", "https://www.cleverpdf.com/cn/pdf-to-word" http.setRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36"
http.send bData http.waitForResponse
waitsec 5 strIndex2 = "" If http.Status = 200 Then Set json = JsonConverter.ParseJson(http.responseText) strIndex2 = json("index") bCon = False For i = 1 To 3 http.Open "GET", "https://www.cleverpdf.com/pdf/fetchStatus?index=" & strIndex2, False http.send "" http.waitForResponse
If InStr(1, http.responseText, "\"result\":0") > 0 Then bCon = True Exit For End If waitsec 5 Next i If bCon = True Then strDownURL = "https://www.cleverpdf.com/" & strIndex2 & "/" & Replace(FILENAME, ".pdf", ".docx") Debug.Print strDownURL Cells(3, 2) = downloadFile(strDownURL, Left(strPath, Len(strPath) - 4) & ".docx") End If End If
' Delete generated file If Len(strIndex2) > 0 Then strDownURL = "https://www.cleverpdf.com/pdf/removeFiles?path=" & strIndex2 & "%5C" & encodeURI(Replace(FILENAME, ".pdf", ".docx")) http.Open "GET", strDownURL, False http.setRequestHeader "Referer", "https://www.cleverpdf.com/cn/pdf-to-word" http.send "" http.waitForResponse Debug.Print http.responseText End If
' Delete original file http.Open "GET", "https://www.cleverpdf.com/pdf/removeFiles?path=" & Replace(encodeURI(strFileURL), "/", "%2F"), False http.send http.waitForResponse Debug.Print http.responseText End If Application.StatusBar = FalseEnd Sub
Function Random16() As String Dim characters As String Dim result As String Dim i As Integer Dim randomIndex As Integer
characters = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
result = ""
For i = 1 To 16 randomIndex = Int((Len(characters) * Rnd) + 1) result = result & Mid(characters, randomIndex, 1) Next i Random16 = resultEnd Function
Function downloadFile(strDownURL As String, strLocalURL As String) As String Dim iDL As Long 'strDownURL = encodeURI(strDownURL) iDL = URLDownloadToFile(0, strDownURL, strLocalURL, 0, 0) If iDL = 0 Then downloadFile = "Document converted and downloaded" Else downloadFile = "Error: Conversion failed or document download failed" End IfEnd Function
Private Sub waitsec(ByVal dS As Double) Dim sTimer As Date sTimer = Timer Do DoEvents Loop While Format((Timer - sTimer), "0.00") < dSEnd Sub
Function decodeURI(ByVal strText As String) As String Dim JS As Object Set JS = CreateObject("ScriptControl") JS.Language = "JavaScript" decodeURI = JS.Eval("decodeURI('" & Replace(strText, "'", "\'") & "');")End Function
Function encodeURI(ByVal strText As String) As String Dim JS As Object Set JS = CreateObject("ScriptControl") JS.Language = "JavaScript" encodeURI = JS.Eval("encodeURI('" & Replace(strText, "'", "\'") & "');")End Function
Function ToBytes(str As String) As Variant
Dim ado As Object Set ado = CreateObject("ADODB.Stream") ado.Open ado.Type = 2 ' text ado.Charset = "_autodetect" ado.WriteText str ado.Position = 0 ado.Type = 1 ToBytes = ado.read ado.Close
End Function
Source file address:https://www.alipan.com/s/omR9gxBaQtj This uses the CleverPDF web interface; there are many other free tools for PDF conversion, such as the Python pdf2docx library. Additionally, I recommend an excellent open-source PDF tool, StirlingPDF, which has both PC and web versions, but the Word conversion uses LibreOffice, and during use, I found some missing characters.