''' Program: webClone.py (Report comments/bugs to chikh@yuntech.edu.tw) Function: 使用curl下載指定網址的檔案 ''' from PyQt5.QtWidgets import * from PyQt5 import QtGui import os class DownloadWebData(QWidget): def __init__(self): super().__init__() self.setWindowTitle("下載網頁所用檔案程式") self.resize(500, 50) self.lineEdit = QLineEdit(self) self.pushButton = QPushButton(self) self.pushButton.setText("開始下載") font = QtGui.QFont() font.setFamily("微軟正黑體") font.setPointSize(11) self.lineEdit.setFont(font) self.pushButton.setFont(font) layout = QVBoxLayout() layout.addWidget(self.lineEdit) layout.addWidget(self.pushButton) self.setLayout(layout) self.lineEdit.returnPressed.connect(self.btnClicked) # https://bit.ly/3BxzOTy self.pushButton.clicked.connect(self.btnClicked) def btnClicked(self): subjectURL = self.lineEdit.text() if subjectURL == '': QMessageBox.warning(self, "運作結果", "網址空白，請輸入有效網址", QMessageBox.Yes) return self.pushButton.setEnabled(False) fileName = subjectURL.split("/")[-1] # 或寫成fileName = subjectURL[subjectURL.rfind("/")+1:] # if "?" in fileName: fileName = fileName[:fileName.find("?")] #內含asp語法的網址，網址應排除"?"後面的內容作為下載的檔名 os.system("curl %s -O -J -s" % subjectURL) self.parseHTMLfile(subjectURL[:subjectURL.rfind("/") + 1], fileName) # self.parseHTMLfile(subjectURL.split("/")[-1]) if QMessageBox.question(self, "運作結果", "複製完成，檢視%s？" % fileName, QMessageBox.Yes | QMessageBox.No) == QMessageBox.Yes: os.system("start %s" % subjectURL.split("/")[-1]) self.lineEdit.clear() self.pushButton.setEnabled(True) def parseHTMLfile(self, mainURL, fileName): inputFile = open(fileName, "r", encoding="utf-8", errors='ignore') # see https://stackoverflow.com/questions/30700166/python-open-file-error fileContents = inputFile.read() fileSize = len(fileContents) self.searchTarget(mainURL, fileContents, fileSize, "href") # 找到"href"出現的位置並依其後的URL進行下載或創建目錄夾的動作 self.searchTarget(mainURL, fileContents, fileSize, "src=") # 找到"src="出現的位置並依其後的URL進行下載或創建目錄夾的動作 inputFile.close() inputFile = open(fileName, "w", encoding="utf-8") inputFile.write(fileContents.replace(mainURL, "")) inputFile.close() def searchTarget(self, mainURL, fileContents, fileSize, keyword): i = fileContents.find(keyword) while i > 0: i = fileContents.find('"', i, fileSize) # locate the first double quote (") mark after the occurrence of the keyword ("href" or "src=") j = fileContents.find('"', i + 1, fileSize) # locate the second double quote (") mark after the occurrence of the keyword ("href" or "src=") filePath = fileContents[i + 1:j] k = filePath.rfind('/') if k < 0 or "mailto" in filePath: i = fileContents.find(keyword, j + 1, fileSize) continue if "http" not in filePath: if not os.path.exists(filePath[:k]): os.system('md "%s"' % filePath[:k]) # 創建目錄 print("下載檔案 %s" % mainURL + filePath) os.system("curl %s -o %s -J -s" % (mainURL + filePath, filePath)) elif mainURL in filePath: k = filePath.find('/', 8, len(filePath)) l = filePath.rfind('/') if not os.path.exists(filePath[k + 1:l]): os.system('md "%s"' % filePath[k + 1:l]) # 創建目錄 print("下載檔案 %s" % filePath) os.system("curl %s -o %s -J -s" % (filePath, filePath[k + 1:])) i = fileContents.find(keyword, j + 1, fileSize) if __name__ == "__main__": app = QApplication([]) win = DownloadWebData() win.show() app.exec_()