C_language_and_applications/web_clone.py

'''
Program: webClone.py (Report comments/bugs to chikh@yuntech.edu.tw)
Function: 使用curl下載指定網址的檔案
'''

from PyQt5.QtWidgets import *
from PyQt5 import QtGui
import os


class DownloadWebData(QWidget):
    def __init__(self):
        super().__init__()
        self.setWindowTitle("下載網頁所用檔案程式")
        self.resize(500, 50)
        self.lineEdit = QLineEdit(self)
        self.pushButton = QPushButton(self)
        self.pushButton.setText("開始下載")
        font = QtGui.QFont()
        font.setFamily("微軟正黑體")
        font.setPointSize(11)
        self.lineEdit.setFont(font)
        self.pushButton.setFont(font)
        layout = QVBoxLayout()
        layout.addWidget(self.lineEdit)
        layout.addWidget(self.pushButton)
        self.setLayout(layout)
        self.lineEdit.returnPressed.connect(self.btnClicked)  # https://bit.ly/3BxzOTy
        self.pushButton.clicked.connect(self.btnClicked)

    def btnClicked(self):
        subjectURL = self.lineEdit.text()
        if subjectURL == '':
            QMessageBox.warning(self, "運作結果", "<font size = 5>網址空白，請輸入有效網址</font>", QMessageBox.Yes)
            return
        self.pushButton.setEnabled(False)
        fileName = subjectURL.split("/")[-1]  # 或寫成fileName = subjectURL[subjectURL.rfind("/")+1:]
        # if "?" in fileName: fileName = fileName[:fileName.find("?")] #內含asp語法的網址，網址應排除"?"後面的內容作為下載的檔名
        os.system("curl %s -O -J -s" % subjectURL)
        self.parseHTMLfile(subjectURL[:subjectURL.rfind("/") + 1],
                           fileName)  # self.parseHTMLfile(subjectURL.split("/")[-1])
        if QMessageBox.question(self, "運作結果", "<font size = 5>複製完成，檢視%s？</font>" % fileName,
                                QMessageBox.Yes | QMessageBox.No) == QMessageBox.Yes:
            os.system("start %s" % subjectURL.split("/")[-1])
        self.lineEdit.clear()
        self.pushButton.setEnabled(True)

    def parseHTMLfile(self, mainURL, fileName):
        inputFile = open(fileName, "r", encoding="utf-8",
                         errors='ignore')  # see https://stackoverflow.com/questions/30700166/python-open-file-error
        fileContents = inputFile.read()
        fileSize = len(fileContents)
        self.searchTarget(mainURL, fileContents, fileSize, "href")  # 找到"href"出現的位置並依其後的URL進行下載或創建目錄夾的動作
        self.searchTarget(mainURL, fileContents, fileSize, "src=")  # 找到"src="出現的位置並依其後的URL進行下載或創建目錄夾的動作
        inputFile.close()
        inputFile = open(fileName, "w", encoding="utf-8")
        inputFile.write(fileContents.replace(mainURL, ""))
        inputFile.close()

    def searchTarget(self, mainURL, fileContents, fileSize, keyword):
        i = fileContents.find(keyword)
        while i > 0:
            i = fileContents.find('"', i,
                                  fileSize)  # locate the first double quote (") mark after the occurrence of the keyword ("href" or "src=")
            j = fileContents.find('"', i + 1,
                                  fileSize)  # locate the second double quote (") mark after the occurrence of the keyword ("href" or "src=")
            filePath = fileContents[i + 1:j]
            k = filePath.rfind('/')
            if k < 0 or "mailto" in filePath:
                i = fileContents.find(keyword, j + 1, fileSize)
                continue
            if "http" not in filePath:
                if not os.path.exists(filePath[:k]): os.system('md "%s"' % filePath[:k])  # 創建目錄
                print("下載檔案 %s" % mainURL + filePath)
                os.system("curl %s -o %s -J -s" % (mainURL + filePath, filePath))
            elif mainURL in filePath:
                k = filePath.find('/', 8, len(filePath))
                l = filePath.rfind('/')
                if not os.path.exists(filePath[k + 1:l]): os.system('md "%s"' % filePath[k + 1:l])  # 創建目錄
                print("下載檔案 %s" % filePath)
                os.system("curl %s -o %s -J -s" % (filePath, filePath[k + 1:]))

            i = fileContents.find(keyword, j + 1, fileSize)


if __name__ == "__main__":
    app = QApplication([])
    win = DownloadWebData()
    win.show()
    app.exec_()