# Parse and Extract HTML # pip install gazpacho import gazpacho # Extract HTML from URL url = 'https://www.example.com/' html = gazpacho.get(url) print(html) # Extract HTML with Headers headers = {'User-Agent': 'Mozilla/5.0'} html = gazpacho.get(url, headers=headers) print(html) # Parse HTML parse = gazpacho.Soup(html) # Find single tags tag1 = parse.find('h1') tag2 = parse.find('span') # Find multiple tags tags1 = parse.find_all('p') tags2 = parse.find_all('a') # Find tags by class tag = parse.find('.class') # Find tags by Attribute tag = parse.find("div", attrs={"class": "test"}) # Extract text from tags text = parse.find('h1').text text = parse.find_all('p')[0].text
二、二维码扫描仪
拥有大量二维码图像或只想扫描二维码图像,那么此自动化脚本将帮助你。该脚本使用 Qrtools 模块,使你能够以编程方式扫描 QR 图像。
1 2 3 4 5 6 7 8 9
# Qrcode Scanner # pip install qrtools from qrtools import Qr def Scan_Qr(qr_img): qr = Qr() qr.decode(qr_img) print(qr.data) return qr.data print("Your Qr Code is: ", Scan_Qr("qr.png"))
# PDF Editor # pip install PyPDf4 import PyPDF4 # Parse the Text from PDF def parse_text(pdf_file): reader = PyPDF4.PdfFileReader(pdf_file) for page in reader.pages: print(page.extractText()) # Remove Page from PDF def remove_page(pdf_file, page_numbers): filer = PyPDF4.PdfReader('source.pdf', 'rb') out = PyPDF4.PdfWriter() for index in page_numbers: page = filer.pages[index] out.add_page(page) with open('rm.pdf', 'wb') as f: out.write(f) # Add Blank Page to PDF def add_page(pdf_file, page_number): reader = PyPDF4.PdfFileReader(pdf_file) writer = PyPDF4.PdfWriter() writer.addPage() with open('add.pdf', 'wb') as f: writer.write(f) # Rotate Pages def rotate_page(pdf_file): reader = PyPDF4.PdfFileReader(pdf_file) writer = PyPDF4.PdfWriter() for page in reader.pages: page.rotateClockwise(90) writer.addPage(page) with open('rotate.pdf', 'wb') as f: writer.write(f) # Merge PDFs def merge_pdfs(pdf_file1, pdf_file2): pdf1 = PyPDF4.PdfFileReader(pdf_file1) pdf2 = PyPDF4.PdfFileReader(pdf_file2) writer = PyPDF4.PdfWriter() for page in pdf1.pages: writer.addPage(page) for page in pdf2.pages: writer.addPage(page) with open('merge.pdf', 'wb') as f: writer.write(f)
六、迷你 Stackoverflow
作为一名程序员,我知道我们每天都需要 StackOverflow,但你不再需要在 Google 上搜索它。现在,在您继续处理项目的同时,在你的 CMD 中获得直接解决方案。通过使用 Howdoi 模块,你可以在命令提示符或终端中获得 StackOverflow 解决方案。你可以在下面找到一些可以尝试的示例。
# Automate Stackoverflow # pip install howdoi # Get Answers in CMD #example 1 > howdoi how do i install python3 # example 2 > howdoi selenium Enter keys # example 3 > howdoi how to install modules # example 4 > howdoi Parse html with python # example 5 > howdoi int not iterable error # example 6 > howdoi how to parse pdf with python # example 7 > howdoi Sort list in python # example 8 > howdoi merge two lists in python # example 9 >howdoi get last element in list python # example 10 > howdoi fast way to sort list
# Get CPU/GPU Temperature # pip install pythonnet import clr clr.AddReference("OpenHardwareMonitorLib") from OpenHardwareMonitorLib import * spec = Computer() spec.GPUEnabled = True spec.CPUEnabled = True spec.Open() # Get CPU Temp def Cpu_Temp(): while True: for cpu in range(0, len(spec.Hardware[0].Sensors)): if "/temperature" in str(spec.Hardware[0].Sensors[cpu].Identifier): print(str(spec.Hardware[0].Sensors[cpu].Value)) # Get GPU Temp def Gpu_Temp() while True: for gpu in range(0, len(spec.Hardware[0].Sensors)): if "/temperature" in str(spec.Hardware[0].Sensors[gpu].Identifier): print(str(spec.Hardware[0].Sensors[gpu].Value))