# AutoCore - Automate Core Actions
# Author: Ash
# GitHub: https://github.com/AshAutomates/AutoCore
# Docs: https://autocore.readthedocs.io
# PyPI: https://pypi.org/project/autocore
# Supports: Windows, Linux
#---------------standard library imports---------------
import atexit
import configparser
import csv
import datetime
import email
import io
import json
import logging
import os
import platform
import re
import shutil
import sqlite3
import subprocess
import sys
import tempfile
import time
import traceback
import warnings
import wave
import xml.etree.ElementTree as et
from difflib import get_close_matches
from logging.handlers import RotatingFileHandler
from pathlib import Path
#---------------networking---------------
import requests
from bs4 import BeautifulSoup
#------------------------------------------------------------
# Image Processing
# Safe on all platforms including headless servers
# PIL.Image does not need a display : only PIL.ImageTk does
#------------------------------------------------------------
import numpy as np
from PIL import Image
#---------------Selenium------------------------
import undetected_chromedriver as uc
from selenium.common.exceptions import *
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select
#--------------- Windows Only---------------
if platform.system() == "Windows":
import win32con
import win32gui
#------------------------------------------------------------
# Document Processing
# Safe on all platforms including headless servers
#------------------------------------------------------------
from PyPDF2 import PdfReader
from docx import Document
from openpyxl import Workbook, load_workbook
from pptx import Presentation
import ebooklib
from ebooklib import epub
from odf import teletype
from odf.opendocument import load as odf_load
from odf.text import P
import extract_msg
from striprtf.striprtf import rtf_to_text
import yaml
#------------------------------------------------------------
# GUI (requires display)
# Needs: X display server on Linux
# Works on: Linux desktop, Windows, macOS
# Fails on: Linux headless servers, Docker, CI/CD, SSH sessions
# Equivalent of running: export DISPLAY=:0 && xhost +local:
#------------------------------------------------------------
if platform.system() == "Linux":
if "DISPLAY" not in os.environ:
os.environ["DISPLAY"] = ":0"
if os.system("xhost +local: > /dev/null 2>&1") == 0:
import tkinter as tk
from PIL import ImageTk
import pyautogui
# Enable PyAutoGUI fail-safe: move mouse to any screen corner to abort script
pyautogui.FAILSAFE = True
_GUI_AVAILABLE = True
else:
tk = None
ImageTk = None
pyautogui = None
_GUI_AVAILABLE = False
else:
import tkinter as tk
from PIL import ImageTk
import pyautogui
pyautogui.FAILSAFE = True
_GUI_AVAILABLE = True
#---------------------------------------------------------------------------
# Audio (requires sound system)
# Works on: Linux desktop, Windows
# Fails on: Linux headless servers with no audio drivers
# piper-tts is loaded lazily inside say() : we just check
# here whether the platform has a usable audio system at all
#
# Linux check 1 : aplay command is available (alsa-utils installed)
# Linux check 2 : at least one real audio playback device exists
# on Ubuntu Server with no audio hardware,
# 'aplay -l' returns non-zero with "no soundcards found"
# on Ubuntu Desktop / Linux Mint with real audio hardware,
# 'aplay -l' returns zero : so _AUDIO_AVAILABLE = True
#---------------------------------------------------------------------------
if platform.system() == "Windows":
_AUDIO_AVAILABLE = True # winsound is always available on Windows
elif platform.system() == "Linux":
aplay_exists = os.system("aplay --version > /dev/null 2>&1") == 0 # alsa-utils installed?
audio_device_exists = os.system("aplay -l > /dev/null 2>&1") == 0 # real audio hardware present?
_AUDIO_AVAILABLE = aplay_exists and audio_device_exists
else:
_AUDIO_AVAILABLE = False # unsupported platform
#------------------------------------------------------------
# Clipboard (requires clipboard manager)
# Works on: Linux desktop, Windows, macOS
# Fails on: Linux headless servers with no clipboard manager
#------------------------------------------------------------
try:
import pyperclip
_CLIPBOARD_AVAILABLE = True
except Exception:
pyperclip = None
_CLIPBOARD_AVAILABLE = False
# print current working directory so user knows where files will be saved
print(f"Current Working Directory: {os.getcwd()}")
# to avoid 'RuntimeError: maximum recursion depth exceeded'
sys.setrecursionlimit(1500)
# Global variables to track logging state
_log_file_handler: RotatingFileHandler | None = None
_original_stdout = None
_original_stderr = None
_log_folder: Path = Path("logs")
_script_had_error = False # Track if unhandled exception occurred
def _preprocess_for_ocr(image):
"""
Preprocesses an image to improve OCR accuracy before passing to EasyOCR.
Args:
image: numpy array of the image in RGB format
Returns:
numpy array: Cleaned binary image ready for EasyOCR
Note:
- Converts to grayscale to remove color noise.
- Upscales 2x using cubic interpolation for better character recognition.
- Applies denoising to remove background noise.
- Applies Otsu thresholding to produce a clean black and white image.
- Applies morphological closing to fill small gaps in characters.
- Lazily imports cv2 to avoid slow startup when running 'from autocore import *'.
"""
import cv2 # pip install opencv-python
gray_image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
gray_image = cv2.resize(gray_image, None, fx=2, fy=2, interpolation=cv2.INTER_CUBIC)
denoised = cv2.fastNlMeansDenoising(gray_image, h=10)
threshold_img = cv2.threshold(denoised, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, 1))
cleaned = cv2.morphologyEx(threshold_img, cv2.MORPH_CLOSE, kernel)
return cleaned
def _click_word_by_ocr(word_to_search, occurrences_to_click, button='left'):
"""
Click on text found via OCR on screen.
Args:
word_to_search: Text to search for on screen
occurrences_to_click: 0 = click all occurrences, N = click Nth occurrence only
button: 'left' for left-click (default), 'right' for right-click
Returns:
True if any click happened, False otherwise
Output:
- Prints how many occurrences were found and which one(s) were clicked.
- Prints a message if the requested occurrence number does not exist.
- Prints a message if the text was not found on screen.
Note:
- Used internally by click() and click_right().
- Cross-platform compatible (Windows, Linux).
"""
import cv2 # pip install opencv-python
click_count = 0
try:
# Get shared OCR reader
reader = _get_ocr_reader()
# Capture screen using pyautogui (cross-platform)
screenshot_img = pyautogui.screenshot()
# Convert PIL image to numpy array
image = np.array(screenshot_img)
# Convert RGB to BGR for OpenCV
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
cleaned = _preprocess_for_ocr(image)
# Extract text with bounding boxes using EasyOCR
results = reader.readtext(cleaned)
# Filter matching text and collect positions
matches = []
for (bbox, text, confidence) in results:
if word_to_search.lower() in text.lower():
# Calculate center point (adjust for 2x upscaling)
center_x = int((bbox[0][0] + bbox[2][0]) / 2 / 2)
center_y = int((bbox[0][1] + bbox[2][1]) / 2 / 2)
matches.append((center_x, center_y, text))
# Sort matches by position (top-to-bottom, then left-to-right)
matches.sort(key=lambda m: (m[1], m[0]))
total_matches = len(matches)
# Handle case when no matches found
if total_matches == 0:
print(f"'{word_to_search}' not found on screen")
return False
# Determine click action based on button parameter
click_action = pyautogui.rightClick if button == 'right' else pyautogui.click
action_name = "right-clicking" if button == 'right' else "clicking"
# Click on matches based on occurrences_to_click
if occurrences_to_click == 0:
# Click all occurrences
if total_matches == 1:
print(f"1 occurrence of '{word_to_search}' found, {action_name} it")
else:
print(f"{total_matches} occurrences of '{word_to_search}' found, {action_name} all")
for (x, y, text) in matches:
click_action(x, y)
click_count += 1
elif 0 < occurrences_to_click <= total_matches:
# Click specific occurrence (1-indexed)
if total_matches == 1:
print(f"1 occurrence of '{word_to_search}' found, {action_name} it")
else:
print(
f"{total_matches} occurrences of '{word_to_search}' found, {action_name} occurrence #{occurrences_to_click}")
x, y, text = matches[occurrences_to_click - 1]
click_action(x, y)
click_count = 1
else:
# Requested occurrence doesn't exist
if total_matches == 1:
print(
f"1 occurrence of '{word_to_search}' found on screen, but you tried to {action_name[:-3]} occurrence #{occurrences_to_click} which doesn't exist")
else:
print(
f"{total_matches} occurrences of '{word_to_search}' found on screen, but you tried to {action_name[:-3]} occurrence #{occurrences_to_click} which doesn't exist")
return False
# Return True if any click happened
return click_count > 0
except Exception as e:
print(f"Error in OCR click: {e}")
return False
def _get_ocr_reader():
"""
Returns a single shared OCR reader instance, creating it only once on first call
and reusing it across all subsequent calls.
Output:
- Prints initialization message on first run.
- Prints confirmation when OCR engine is ready.
Note:
- Lazily imports easyocr inside the function to avoid slow startup time
when running 'from autocore import *' : easyocr is only loaded when
OCR is actually needed.
- Tries GPU first, falls back to CPU if GPU is unavailable.
- Shared across all OCR functions (click, read, etc.).
- First run downloads OCR models (~100MB).
- Subsequent runs load instantly from cache.
Returns:
easyocr.Reader: Shared OCR reader instance
"""
# Third-party imports - OCR
import easyocr # pip install "numpy<2" easyocr
if not hasattr(_get_ocr_reader, 'reader'):
try:
print("Initializing OCR engine (first run downloads models, please wait)...")
# Try GPU first
_get_ocr_reader.reader = easyocr.Reader(['en'], gpu=True, verbose=False)
except:
# Fall back to CPU if GPU fails
_get_ocr_reader.reader = easyocr.Reader(['en'], gpu=False, verbose=False)
print("OCR engine ready.")
return _get_ocr_reader.reader
def _get_web_element(driver_obj, selector_type, selector):
"""
Locates and returns a single web element using Selenium.
Args:
driver_obj: Selenium WebDriver instance
selector_type: Type of selector ('id', 'xpath', 'class', 'name', 'css', 'tag', 'text', 'partial')
selector: Selector value/string
Example:
::
element = _get_web_element(driver, 'id', 'submit-button')
element = _get_web_element(driver, 'xpath', '//button[@type="submit"]')
element = _get_web_element(driver, 'class', 'btn-primary')
Note:
- Used internally by click(), click_right(), write(), copy(), wait() and other functions.
- 'text' and 'partial' selector types are case-sensitive.
- Returns None if element is not found instead of raising an exception.
Returns:
WebElement if found, None if not found
"""
try:
if selector_type == "id":
return driver_obj.find_element(By.ID, selector)
elif selector_type == "xpath":
return driver_obj.find_element(By.XPATH, selector)
elif selector_type == "class":
return driver_obj.find_element(By.CLASS_NAME, selector)
elif selector_type == "name":
return driver_obj.find_element(By.NAME, selector)
elif selector_type == "css":
return driver_obj.find_element(By.CSS_SELECTOR, selector)
elif selector_type == "tag":
return driver_obj.find_element(By.TAG_NAME, selector)
elif selector_type == "text": # remember, text search is case-sensitive
return driver_obj.find_element(By.LINK_TEXT, selector)
elif selector_type == "partial": # remember, partial text search is case-sensitive
return driver_obj.find_element(By.PARTIAL_LINK_TEXT, selector)
else:
return None
except NoSuchElementException:
return None
class _CustomRotatingFileNameHandler(RotatingFileHandler):
"""
Custom log rotation handler that extends RotatingFileHandler with two behaviours:
1. Renames rotated files from log.txt.1 format to log_part_1.txt format
to keep the .txt extension visible and readable in file explorers.
2. Automatically deletes the oldest log files when the total logs folder
size exceeds 100MB, keeping disk usage under control.
Used internally by log_setup().
"""
def rotation_filename(self, default_name):
"""
Converts default rotation filename format to a cleaner format.
Example:
log_script.txt.1 --> log_script_part_1.txt
"""
match = re.search(r'\.txt\.(\d+)$', default_name)
if match:
part_num = match.group(1)
return default_name.replace(f'.txt.{part_num}', f'_part_{part_num}.txt')
return default_name
def doRollover(self):
"""
Performs log rotation and triggers cleanup of old logs if folder exceeds 100MB.
"""
# Do the normal rotation first
super().doRollover()
# Now cleanup old logs if folder exceeds 100MB
self._cleanup_old_logs()
def _cleanup_old_logs(self):
"""
Deletes the oldest log files in the logs folder when total size exceeds 100MB,
removing files one by one until the folder is back under 100MB.
Output:
- Prints name and remaining folder size after each deleted file.
- Prints warning if a file could not be deleted.
- Prints warning if cleanup itself fails.
"""
try:
log_folder = Path("logs")
if not log_folder.exists():
return
log_files = []
total_size = 0
for log_file in log_folder.glob("log_*.txt*"):
size = log_file.stat().st_size
mtime = log_file.stat().st_mtime
log_files.append((log_file, size, mtime))
total_size += size
# Convert to MB
total_size_mb = total_size / (1024 * 1024)
max_total_mb = 100
# Keep deleting oldest files until under 100MB
if total_size_mb > max_total_mb:
# Sort by modification time (oldest first)
log_files.sort(key=lambda x: x[2])
for log_file, size, _ in log_files:
if total_size_mb <= max_total_mb:
break
try:
log_file.unlink() # deletes the file from the filesystem.
total_size_mb -= size / (1024 * 1024)
print(f"Deleted old log file: {log_file.name} (folder size: {total_size_mb:.1f}MB)")
except Exception as e:
print(f"Warning: Could not delete {log_file.name}: {e}")
except Exception as e:
print(f"Warning: Log cleanup failed: {e}")
class _LogCapture:
"""
Intercepts stdout and stderr to mirror all print statements to both
the terminal and the log file simultaneously.
Used internally by log_setup() to redirect sys.stdout and sys.stderr.
Note:
- Uses a guard flag (_logging) to prevent infinite recursion that would
occur if the logger itself triggers a write() call.
- flush() is implemented to satisfy the stream interface expected by
sys.stdout and sys.stderr.
"""
def __init__(self, original_stream, logger, level):
"""
Args:
original_stream: The original sys.stdout or sys.stderr stream
logger: The logging.Logger instance to write to
level: Logging level (logging.INFO for stdout, logging.ERROR for stderr)
"""
self.original_stream = original_stream
self.logger = logger
self.level = level
self._logging = False # guard flag to prevent infinite recursion
def write(self, message):
"""
Writes message to both terminal and log file.
Skips empty messages and guards against recursive calls.
"""
self.original_stream.write(message)
self.original_stream.flush()
# Write to log file (strip to avoid double newlines)
# guard flag prevents recursive calls when logger itself triggers write()
if message.strip() and not self._logging:
self._logging = True # block any further recursive calls
try:
self.logger.log(self.level, message.rstrip())
finally:
self._logging = False # always release guard even if exception occurs
def flush(self):
"""
Flushes the original stream to satisfy the stream interface.
"""
self.original_stream.flush()
[docs]
def browser(url, headless=False, timeout=30, cookie_path=None):
"""
Initialize and return a browser instance for web automation.
Args:
url: Target URL to navigate to
headless: Run browser in headless mode (default: False)
timeout: Maximum seconds to wait for elements to appear (default: 30)
cookie_path: Path to cookies JSON file (optional)
::
- Cookies MUST be in JSON format
- Export from Chrome using "Cookie-Editor" extension
- Cookie domain must match the target URL
Returns:
WebDriver: Browser instance or None if initialization fails
Example:
::
# Basic usage
driver = browser('https://google.com')
click(driver, 'id', 'search-button')
# Slow-loading site
driver = browser('https://slow-site.gov', timeout=90)
click(driver, 'id', 'submit-btn') # Waits up to 90s
# Fast site testing
driver = browser('https://fast-site.com', timeout=5)
click(driver, 'id', 'login-btn') # Fails fast in 5s
# With cookies
driver = browser('https://site.com', cookie_path='cookies.json')
# Headless mode
driver = browser('https://google.com', headless=True)
# Trigger a download and wait for it
driver = browser('https://example.com')
click(driver, 'id', 'download-button')
wait_download(download_dir=driver.download_dir)
Note:
Uses undetected-chromedriver (uc) to bypass bot detection.
Requires Google Chrome to be installed.
Windows:
winget install Google.Chrome
Linux (Ubuntu/Debian/Mint):
wget https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb
sudo dpkg -i google-chrome-stable_current_amd64.deb
sudo apt-get install -f -y
Linux (RHEL/CentOS/Fedora):
wget https://dl.google.com/linux/direct/google-chrome-stable_current_x86_64.rpm
sudo rpm -i google-chrome-stable_current_x86_64.rpm
"""
# Auto-add https:// if protocol is missing
if not url.startswith(('http://', 'https://')):
url = 'https://' + url
print(f"Protocol not specified. Using: {url}")
# Initialize options for Chrome
options = uc.ChromeOptions()
# --------------- HEADLESS MODE ---------------
if headless:
options.add_argument("--headless=new")
options.add_argument("--window-size=2560,1440")
options.add_argument("--disable-gpu") # disable GPU (prevents SwiftShader WebGL signal)
options.add_argument("--enable-features=OverlayScrollbar") # mimics real Chrome scrollbar behavior
# --------------- CHROME VERSION ---------------
# Fetch installed Chrome version dynamically to keep user-agent current.
# A matching Chrome version in user-agent makes the browser look more like a real user.
# Initialize to None so major_version check below does not raise NameError if detection fails.
chrome_version = None
try:
system = platform.system()
if system == "Windows":
result = subprocess.run(
['reg', 'query', 'HKEY_CURRENT_USER\\Software\\Google\\Chrome\\BLBeacon', '/v', 'version'],
capture_output=True, text=True)
chrome_version = result.stdout.strip().split()[-1]
elif system == "Linux":
result = subprocess.run(['google-chrome', '--version'], capture_output=True, text=True)
chrome_version = result.stdout.strip().split()[-1]
user_agent = f"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/{chrome_version} Safari/537.36"
except Exception:
# If Chrome version detection fails, use a generic user-agent without version number
user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome Safari/537.36"
# Set a user-agent string to make the automated browser look like a regular Chrome browser
options.add_argument(f"--user-agent={user_agent}")
# Additional options to enhance realism and disable Selenium detection
options.add_argument('--start-maximized') # Start browser maximized
options.add_argument("--safebrowsing-disable-download-protection") # allow unverified downloads
options.add_argument("--disable-features=InsecureDownloadWarnings") # disable insecure download warnings
options.add_argument("--disable-features=DownloadBubble") # disable download bubble UI
options.add_argument("--no-sandbox") # disable sandbox restrictions
options.add_argument("--lang=en-US") # set language to avoid empty navigator.languages
options.add_argument("--disable-translate") # disable Google Translate popup
options.add_argument("--no-first-run") # skip first-run welcome popups
options.add_argument("--no-default-browser-check") # skip default browser prompt
# --------------- DOWNLOAD DIRECTORY ---------------
# Resolve download directory using same priority as wait_download()
# Pre-create it before Chrome launches so Chrome finds and uses it
if os.getenv('DOWNLOAD_DIR'):
download_dir = os.getenv('DOWNLOAD_DIR')
elif os.path.exists('/.dockerenv'):
download_dir = '/downloads'
else:
download_dir = str(Path.home() / 'Downloads')
os.makedirs(download_dir, exist_ok=True)
# --------------- CHROME PREFERENCES ---------------
# Set preferences to avoid unnecessary pop-ups and block notifications
prefs = {
"download.default_directory": download_dir, # explicitly set download folder
"profile.default_content_setting_values.notifications": 2, # block browser notification popups
'credentials_enable_service': False, # disable save password popup
'profile': {'password_manager_enabled': False}, # disable password manager completely
'profile.password_manager_leak_detection': False, # breach popup
"profile.default_content_setting_values.notifications": 2, # block notification popups
"translate.enabled": False, # disable translate bar
"autofill.profile_enabled": False, # disable autofill suggestions
"autofill.credit_card_enabled": False, # disable credit card autofill
"safebrowsing.enabled": False, # disable safe browsing to allow downloads
"download.prompt_for_download": False, # no download confirmation prompt
"download.directory_upgrade": True, # allow download directory changes
"plugins.always_open_pdf_externally": True, # download PDFs instead of opening in browser
"safebrowsing_for_trusted_sources_enabled": False, # disable safe browsing for trusted sources
"safebrowsing.disable_download_protection": True, # disable download protection completely
}
options.add_experimental_option("prefs", prefs)
# --------------- INITIALIZE DRIVER ---------------
try:
# Extract major version from detected Chrome version to prevent ChromeDriver mismatch
major_version = int(chrome_version.split('.')[0]) if chrome_version else None
# Pass major version to force matching ChromeDriver download
driver_instance = uc.Chrome(options=options, version_main=major_version)
except Exception as e:
print(f"Error initializing Chrome Driver: {e}")
return None
# --------------- BOT DETECTION PATCHES ---------------
# Inject JS to patch remaining bot detection signals
driver_instance.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
"source": """
// Fix navigator.webdriver at prototype level
Object.defineProperty(navigator, 'webdriver', {
get: () => false,
configurable: true,
enumerable: true
});
try { delete navigator.__proto__.webdriver; } catch(e) {}
// Fix plugins to return proper PluginArray-like object
const mockPlugins = ['Chrome PDF Plugin', 'Chrome PDF Viewer', 'Native Client'].map(name => {
const plugin = Object.create(Plugin.prototype);
Object.defineProperty(plugin, 'name', {get: () => name});
Object.defineProperty(plugin, 'filename', {get: () => name.toLowerCase().replace(/ /g, '-') + '.dll'});
Object.defineProperty(plugin, 'description', {get: () => name});
Object.defineProperty(plugin, 'length', {get: () => 0});
return plugin;
});
Object.defineProperty(navigator, 'plugins', {
get: () => {
const arr = Object.create(PluginArray.prototype);
mockPlugins.forEach((p, i) => arr[i] = p);
Object.defineProperty(arr, 'length', {get: () => mockPlugins.length});
arr.item = i => mockPlugins[i];
arr.namedItem = name => mockPlugins.find(p => p.name === name) || null;
arr.refresh = () => {};
return arr;
}
});
// Fix permissions to return 'prompt' instead of 'denied'
const originalQuery = window.navigator.permissions.query.bind(window.navigator.permissions);
window.navigator.permissions.query = (parameters) => {
const permissionsToSpoof = ['notifications', 'push', 'midi', 'camera', 'microphone', 'geolocation'];
if (permissionsToSpoof.includes(parameters.name)) {
return Promise.resolve({
state: 'prompt',
permission: 'prompt',
status: 'prompt',
onchange: null
});
}
return originalQuery(parameters);
};
// Also patch inside iframes
if (window.top !== window.self) {
window.navigator.permissions.query = (parameters) => {
return Promise.resolve({state: 'prompt', permission: 'prompt', onchange: null});
};
}
// Fix navigator.languages
Object.defineProperty(navigator, 'languages', {get: () => ['en-US', 'en']});
// Fix navigator.mimeTypes
Object.defineProperty(navigator, 'mimeTypes', {get: () => [
{type: 'application/pdf'},
{type: 'application/x-nacl'},
]});
// Fix window.chrome
window.chrome = {
webstore: {
onInstallStageChanged: {},
onDownloadProgress: {},
install: function() {},
constructor: function() {}
},
runtime: {
onConnect: null,
onMessage: null,
connect: function() {},
sendMessage: function() {},
id: undefined
},
loadTimes: function() {},
csi: function() {},
app: {
isInstalled: false,
InstallState: {DISABLED: 'disabled', INSTALLED: 'installed', NOT_INSTALLED: 'not_installed'},
RunningState: {CANNOT_RUN: 'cannot_run', READY_TO_RUN: 'ready_to_run', RUNNING: 'running'}
}
};
// Fix WebGL renderer
const getParameter = WebGLRenderingContext.prototype.getParameter;
WebGLRenderingContext.prototype.getParameter = function(parameter) {
if (parameter === 37445) return 'Intel Inc.';
if (parameter === 37446) return 'Intel Iris OpenGL Engine';
return getParameter.call(this, parameter);
};
"""
})
# Set an implicit wait for elements to be found
driver_instance.implicitly_wait(timeout)
# Load a blank page to initialize the driver properly before cookie injection
driver_instance.get("about:blank")
# --------------- COOKIES ---------------
# Load cookies from the specified path, if available.
if cookie_path and os.path.exists(cookie_path):
# Load target page to inject cookie
driver_instance.get(url)
try:
with open(cookie_path, "r", encoding="utf-8") as f:
cookies = json.load(f)
for cookie in cookies:
# Remove fields added by browser extensions that Selenium does not support
cookie.pop("sameSite", None)
cookie.pop("hostOnly", None)
cookie.pop("session", None)
cookie.pop("storeId", None)
# Selenium rejects cookies with a leading dot in the domain, while browsers accept them, so the dot must be removed for compatibility.
if cookie.get("domain", "").startswith("."):
cookie["domain"] = cookie["domain"][1:]
driver_instance.add_cookie(cookie)
# Reload page so cookies show effect
driver_instance.get(url)
except Exception as e:
print(f"Cookie loading failed: {e}")
return None
else:
# No cookies provided so single page load is enough
driver_instance.get(url)
# Suppress undetected_chromedriver's __del__ cleanup error on exit
driver_instance.__class__.__del__ = lambda self: None
# Store resolved download directory so caller can pass it to wait_download()
driver_instance.download_dir = download_dir
# Return the driver instance - user can name it anything!
return driver_instance
[docs]
def click(*where):
"""
Performs left-click based on different input types.
Modes:
1. Image matching: Click on visual element
2. OCR text matching: Click on text found on screen (with nth occurrence support)
3. Coordinates: Click at specific x, y position
4. Color matching: Click on specific color in region
5. Selenium web element: Click on element in browser
Args:
*where: Variable arguments depending on click mode
Returns:
True if successful, False otherwise
Example:
::
# Image matching
click('button.png')
# OCR text matching
click('Submit') # Click first occurrence
click('Submit', 2) # Click 2nd occurrence
click('Login', 0) # Click all occurrences
# Coordinates
click(100, 200)
# Color matching in region
click(x1, y1, x2, y2, r, g, b) # Find and click color
click(x1, y1, x2, y2, r, g, b, tolerance) # With tolerance
# Selenium (pass driver object first)
click(driver, 'id', 'submit-button')
click(driver, 'xpath', '//button[@id="submit"]')
click(driver, 'class', 'btn-primary')
click(driver, 'name', 'username')
click(driver, 'css', 'button.submit')
click(driver, 'tag', 'button')
click(driver, 'text', 'Click Here')
click(driver, 'partial', 'Click')
"""
import cv2
# --------------- SELENIUM ---------------
if len(where) > 0 and hasattr(where[0], 'find_element'):
driver_obj = where[0]
if len(where) < 3:
print("Error: Selenium click requires 3 arguments: click(driver, selector_type, selector)")
return False
selector_type = where[1]
selector = where[2]
try:
element = _get_web_element(driver_obj, selector_type, selector)
if element:
driver_obj.execute_script("arguments[0].scrollIntoView({block: 'center'});", element)
try:
element.click()
except ElementClickInterceptedException:
# Fall back to JavaScript click when element is blocked by an overlay (e.g. ads)
driver_obj.execute_script("arguments[0].click();", element)
return True
else:
print(f"Element not found: {selector_type} - {selector}")
return False
except NoSuchElementException:
print(f"Element of {selector_type} - {selector} not found.")
return False
except Exception as e:
print(f"Error clicking element: {e}")
return False
# --------------- PYAUTOGUI ---------------
if not _GUI_AVAILABLE:
print("Error: click() requires a display.")
return False
# --------------- IMAGE MATCHING / OCR TEXT ---------------
elif len(where) == 1:
if '.' in where[0]:
pyautogui.click(where[0])
return True
else:
result = _click_word_by_ocr(where[0], 1, button='left')
return result
# --------------- COORDINATES / OCR WITH OCCURRENCE ---------------
elif len(where) == 2:
if isinstance(where[0], int) and isinstance(where[1], int):
pyautogui.click(where[0], where[1])
return True
elif isinstance(where[0], str) and isinstance(where[1], int):
result = _click_word_by_ocr(where[0], where[1], button='left')
return result
else:
print("Error: Invalid arguments for click()")
return False
# --------------- COLOR MATCHING IN REGION ---------------
elif len(where) in [7, 8]:
x_from, y_from, x_to, y_to, r, g, b = where[:7]
tolerance = where[7] if len(where) == 8 else 0
try:
screenshot_img = pyautogui.screenshot(region=(x_from, y_from, x_to - x_from, y_to - y_from))
screenshot_img = np.array(screenshot_img)
screenshot_img = cv2.cvtColor(screenshot_img, cv2.COLOR_RGB2BGR)
lower = np.array([b - tolerance, g - tolerance, r - tolerance])
upper = np.array([b + tolerance, g + tolerance, r + tolerance])
mask = cv2.inRange(screenshot_img, lower, upper)
points = cv2.findNonZero(mask)
if points is not None:
click_x, click_y = points[0][0]
pyautogui.click(x_from + click_x, y_from + click_y)
print(f'Pixel found and clicked at ({x_from + click_x}, {y_from + click_y}).')
return True
print('Pixel not found.')
return False
except Exception as e:
print(f"Error during color search: {e}")
return False
# ---------------Invalid Arguments---------------
else:
print("Error: Invalid arguments for click()")
return False
[docs]
def click_right(*where):
"""
Performs right-click (context menu) based on different input types.
Modes:
1. Image matching: Right-click on visual element
2. OCR text matching: Right-click on text found on screen (with nth occurrence support)
3. Coordinates: Right-click at specific x, y position
4. Color matching: Right-click on specific color in region
5. Selenium web element: Right-click on element in browser
Args:
*where: Variable arguments depending on click mode
Returns:
True if successful, False otherwise
Example:
::
# Image matching
click_right('button.png')
# OCR text matching
click_right('Submit') # Right-click first occurrence
click_right('Submit', 2) # Right-click 2nd occurrence
click_right('Login', 0) # Right-click all occurrences
# Coordinates
click_right(100, 200)
# Color matching in region
click_right(x1, y1, x2, y2, r, g, b) # Find and right-click color
click_right(x1, y1, x2, y2, r, g, b, tolerance) # With tolerance
# Selenium (pass driver object first)
click_right(driver, 'id', 'submit-button')
click_right(driver, 'xpath', '//button[@id="submit"]')
click_right(driver, 'class', 'btn-primary')
click_right(driver, 'name', 'username')
click_right(driver, 'css', 'button.submit')
click_right(driver, 'tag', 'button')
click_right(driver, 'text', 'Click Here')
click_right(driver, 'partial', 'Click')
"""
import cv2
# --------------- SELENIUM ---------------
if len(where) > 0 and hasattr(where[0], 'find_element'):
driver_obj = where[0]
if len(where) < 3:
print("Error: Selenium click_right requires 3 arguments: click_right(driver, selector_type, selector)")
return False
selector_type = where[1]
selector = where[2]
try:
element = _get_web_element(driver_obj, selector_type, selector)
if element:
driver_obj.execute_script("arguments[0].scrollIntoView({block: 'center'});", element)
ActionChains(driver_obj).context_click(element).perform()
return True
else:
print(f"Element not found: {selector_type} - {selector}")
return False
except NoSuchElementException:
print(f"Element of {selector_type} - {selector} not found.")
return False
except Exception as e:
print(f"Error right-clicking element: {e}")
return False
# --------------- PYAUTOGUI ---------------
if not _GUI_AVAILABLE:
print("Error: click_right() requires a display.")
return False
# --------------- IMAGE MATCHING / OCR TEXT ---------------
elif len(where) == 1:
if '.' in where[0]:
pyautogui.rightClick(where[0])
return True
else:
result = _click_word_by_ocr(where[0], 1, button='right')
return result
# --------------- COORDINATES / OCR WITH OCCURRENCE ---------------
elif len(where) == 2:
if isinstance(where[0], int) and isinstance(where[1], int):
pyautogui.rightClick(where[0], where[1])
return True
elif isinstance(where[0], str) and isinstance(where[1], int):
result = _click_word_by_ocr(where[0], where[1], button='right')
return result
else:
print("Error: Invalid arguments for click_right()")
return False
# --------------- COLOR MATCHING IN REGION ---------------
elif len(where) in [7, 8]:
x_from, y_from, x_to, y_to, r, g, b = where[:7]
tolerance = where[7] if len(where) == 8 else 0
try:
screenshot_img = pyautogui.screenshot(region=(x_from, y_from, x_to - x_from, y_to - y_from))
screenshot_img = np.array(screenshot_img)
screenshot_img = cv2.cvtColor(screenshot_img, cv2.COLOR_RGB2BGR)
lower = np.array([b - tolerance, g - tolerance, r - tolerance])
upper = np.array([b + tolerance, g + tolerance, r + tolerance])
mask = cv2.inRange(screenshot_img, lower, upper)
points = cv2.findNonZero(mask)
if points is not None:
click_x, click_y = points[0][0]
pyautogui.rightClick(x_from + click_x, y_from + click_y)
print(f'Pixel found and right-clicked at ({x_from + click_x}, {y_from + click_y}).')
return True
print('Pixel not found.')
return False
except Exception as e:
print(f"Error during color search: {e}")
return False
# ---------------Invalid Arguments---------------
else:
print("Error: Invalid arguments for click_right()")
return False
[docs]
def copy(*where):
"""
Copies text from various sources: screen, clipboard, Selenium elements or web pages.
Modes:
1. Active window: Copy all content from current window
2. Clipboard: Get current clipboard content
3. Screen coordinates: Click at position and copy
4. Selenium webpage: Copy entire page content
5. Selenium element: Copy element text or attribute value
Args:
*where: Variable arguments depending on copy mode
Returns:
str: Copied text or '' if nothing was copied
Example:
::
# Active window - Copy everything from current window
# Ctrl+A, Ctrl+C from active window
copy()
# Clipboard
# Get current clipboard content
copy('clipboard')
# Screen coordinates
# Click at (500, 300) and copy
copy(500, 300)
# Selenium webpage - Copy entire page
# Copy all webpage content
copy(driver)
# Selenium element - Copy text
copy(driver, 'id', 'username-display')
copy(driver, 'xpath', '//div[@class="name"]')
copy(driver, 'class', 'user-info')
copy(driver, 'name', 'description')
copy(driver, 'css', 'p.content')
copy(driver, 'tag', 'h1')
copy(driver, 'text', 'Welcome')
copy(driver, 'partial', 'Hello')
# Selenium element - Copy attribute
copy(driver, 'id', 'download-link', 'href') # Get link URL
copy(driver, 'class', 'product-img', 'src') # Get image source
copy(driver, 'id', 'email-field', 'value') # Get input value
copy(driver, 'xpath', '//a[@id="link"]', 'title') # Get title attribute
"""
result = None
# --------------- MODE 1 : ACTIVE WINDOW ---------------
# needs: GUI + clipboard
if len(where) == 0:
if not _GUI_AVAILABLE:
print("Error: copy() requires a display.")
return ''
if not _CLIPBOARD_AVAILABLE:
print("Error: copy() requires a clipboard manager.")
return ''
pyperclip.copy('')
pyautogui.hotkey('ctrl', 'a')
time.sleep(1)
pyautogui.hotkey('ctrl', 'c')
time.sleep(1)
result = pyperclip.paste().strip()
# --------------- ONE ARGUMENT ---------------
elif len(where) == 1:
# --------------- MODE 2 : CLIPBOARD ---------------
# needs: clipboard only
if where[0] == 'clipboard':
if not _CLIPBOARD_AVAILABLE:
print("Error: copy('clipboard') requires a clipboard manager.")
return ''
result = pyperclip.paste().strip()
# --------------- MODE 3 : SELENIUM WEBPAGE ---------------
# needs: clipboard (for paste after Ctrl+C)
elif hasattr(where[0], 'find_element'):
if not _CLIPBOARD_AVAILABLE:
print("Error: copy(driver) requires a clipboard manager.")
return ''
driver_obj = where[0]
pyperclip.copy('')
try:
action = ActionChains(driver_obj)
action.key_down(Keys.CONTROL).send_keys('a').key_up(Keys.CONTROL).perform()
time.sleep(1)
action.key_down(Keys.CONTROL).send_keys('c').key_up(Keys.CONTROL).perform()
time.sleep(1)
result = pyperclip.paste().strip()
pyperclip.copy('')
driver_obj.execute_script("window.getSelection().removeAllRanges();")
except Exception as e:
print(f"Error copying webpage content: {e}")
else:
print(f"Invalid argument: {where[0]}")
# --------------- TWO ARGUMENTS ---------------
elif len(where) == 2:
# --------------- MODE 4 : SCREEN COORDINATES ---------------
# needs: GUI + clipboard
if isinstance(where[0], int) and isinstance(where[1], int):
if not _GUI_AVAILABLE:
print("Error: copy(x, y) requires a display.")
return ''
if not _CLIPBOARD_AVAILABLE:
print("Error: copy(x, y) requires a clipboard manager.")
return ''
pyperclip.copy('')
pyautogui.click(where[0], where[1])
time.sleep(1)
pyautogui.hotkey('ctrl', 'a')
time.sleep(1)
pyautogui.hotkey('ctrl', 'c')
time.sleep(1)
result = pyperclip.paste().strip()
elif hasattr(where[0], 'find_element'):
print("Error: Selenium copy requires 3 arguments: copy(driver, selector_type, selector)")
print("Example: copy(driver, 'id', 'username')")
else:
print("Invalid arguments for copy()")
# --------------- MODE 5 : SELENIUM ELEMENT TEXT OR ATTRIBUTE ---------------
# needs: nothing : reads directly from DOM, no clipboard needed
elif len(where) in [3, 4]:
if hasattr(where[0], 'find_element'):
driver_obj = where[0]
selector_type = where[1]
selector = where[2]
attribute = where[3] if len(where) == 4 else None
try:
element = _get_web_element(driver_obj, selector_type, selector)
if element:
if attribute:
attr_value = element.get_attribute(attribute)
result = attr_value.strip() if attr_value else ''
else:
result = element.text.strip()
else:
print(f"Element not found: {selector_type} - {selector}")
except Exception as e:
print(f"Error copying from element: {e}")
else:
print("Error: For Selenium mode, first argument must be driver object")
# --------------- INVALID ARGUMENTS ---------------
else:
print("Invalid arguments provided for copy()")
# --------------- RETURN RESULT ---------------
if result == '' or result is None:
print("No content returned from copy()")
return ''
else:
return result
[docs]
def csv_to_xlsx(csv_file=None, delete_csv=True):
"""
Converts CSV file(s) to XLSX format.
Args:
csv_file: Path to CSV file or None to auto-detect single CSV in current directory
delete_csv: If True, deletes original CSV after conversion (default: True)
Returns:
str: Path of created XLSX file or None if error
Output:
- Prints the detected CSV filename when auto-detected.
- Prints conversion result showing source and destination filenames.
- Prints confirmation when original CSV is deleted.
Example:
::
# Auto-detect single CSV in current directory (deletes CSV by default)
csv_to_xlsx() # Finds, converts and deletes CSV
# Specific file (deletes CSV by default)
csv_to_xlsx('data.csv') # Converts and deletes data.csv
# Keep original CSV
csv_to_xlsx('report.csv', delete_csv=False) # Keeps report.csv
"""
# Auto-detect CSV if not provided
if csv_file is None:
csv_files = list(Path('.').glob('*.csv'))
if len(csv_files) == 0:
print("Error: No CSV files found in current directory")
return None
elif len(csv_files) == 1:
csv_file = str(csv_files[0])
print(f"Auto-detected CSV file: {csv_file}")
else:
print(f"Error: Multiple CSV files found ({len(csv_files)}). Please specify which one to convert:")
for f in csv_files:
print(f" - {f.name}")
return None
new_wb = Workbook()
new_ws = new_wb.active
try:
with open(csv_file, 'r', encoding='utf-8', newline='') as f:
reader = csv.reader(f)
for row in reader:
new_ws.append(row)
xlsx_file = Path(csv_file).with_suffix(".xlsx")
new_wb.save(xlsx_file)
print(f"Converted: {csv_file} to {xlsx_file.name}")
if delete_csv:
os.remove(csv_file)
print(f"Deleted: {csv_file}")
return str(xlsx_file)
except Exception as e:
print(f"Error converting CSV to XLSX: {e}")
return None
[docs]
def date():
"""
Get current day of month.
Returns:
int: Current day (1-31)
Example:
date() # 24
"""
return time.localtime().tm_mday
[docs]
def day():
"""
Get current day of week.
Returns:
str: Day name in lowercase (monday, tuesday, wednesday, thursday, friday, saturday, sunday)
Example:
::
# Weekday check
if day() == 'monday':
print("It is Monday today.")
"""
days = ['monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday']
return days[time.localtime().tm_wday]
[docs]
def drag(*args):
"""
Drag from source to target.
Modes:
1. PyAutoGUI screen drag: (x1, y1, x2, y2)
2. Selenium element drag: (driver, src_type, src_selector, tgt_type, tgt_selector)
Args:
PyAutoGUI: (x1, y1, x2, y2)
Selenium: (driver, src_type, src_selector, tgt_type, tgt_selector)
Returns:
True if successful, False otherwise
Output:
- Prints drag confirmation showing source and target coordinates (PyAutoGUI).
- Prints drag confirmation showing source and target selectors (Selenium).
Example:
::
# Screen drag (PyAutoGUI) - 2 second duration
drag(100, 200, 500, 600)
# Web element drag (Selenium)
drag(driver, 'id', 'card-1', 'class', 'done-column')
drag(driver, 'xpath', '//li[1]', 'xpath', '//li[5]')
# Multiple drivers
driver1 = browser('https://trello.com')
driver2 = browser('https://jira.com')
drag(driver1, 'id', 'task-1', 'id', 'done-column')
drag(driver2, 'class', 'issue', 'class', 'backlog')
"""
# ------------------------------------------------------------
# MODE 1 : PYAUTOGUI SCREEN DRAG (4 integer arguments)
# needs: display
# ------------------------------------------------------------
if len(args) == 4 and all(isinstance(arg, int) for arg in args):
if not _GUI_AVAILABLE:
print("Error: drag() requires a display.")
return False
x1, y1, x2, y2 = args
try:
pyautogui.moveTo(x1, y1, 1, pyautogui.easeInOutQuad)
pyautogui.dragTo(x2, y2, duration=2, button='left')
print(f"Dragged from ({x1}, {y1}) to ({x2}, {y2})")
return True
except Exception as e:
print(f"Error during drag: {e}")
return False
# ------------------------------------------------------------
# MODE 2 : SELENIUM ELEMENT DRAG (5 arguments, first is WebDriver)
# needs: nothing : works on all platforms
# ------------------------------------------------------------
elif len(args) == 5 and hasattr(args[0], 'find_element'):
driver_obj = args[0]
src_type = args[1]
src_selector = args[2]
tgt_type = args[3]
tgt_selector = args[4]
valid_selectors = ['id', 'xpath', 'class', 'name', 'css', 'tag', 'text', 'partial']
if src_type not in valid_selectors:
raise ValueError(f"Invalid source selector type '{src_type}'. Valid: {', '.join(valid_selectors)}")
if tgt_type not in valid_selectors:
raise ValueError(f"Invalid target selector type '{tgt_type}'. Valid: {', '.join(valid_selectors)}")
try:
source = _get_web_element(driver_obj, src_type, src_selector)
target = _get_web_element(driver_obj, tgt_type, tgt_selector)
if not source:
print(f"Source element not found: {src_type} = '{src_selector}'")
return False
if not target:
print(f"Target element not found: {tgt_type} = '{tgt_selector}'")
return False
ActionChains(driver_obj).drag_and_drop(source, target).perform()
print(f"Dragged element from {src_type}='{src_selector}' to {tgt_type}='{tgt_selector}'")
return True
except Exception as e:
print(f"Error during Selenium drag: {e}")
return False
# ---------------Invalid Arguments---------------
else:
raise ValueError(
"Invalid arguments. Use drag(x1, y1, x2, y2) or drag(driver, src_type, src_selector, tgt_type, tgt_selector)")
[docs]
def dropdown_select(driver_obj, selector_type, selector, selection_criteria):
"""
Selects an item from a dropdown menu based on the provided criteria.
Args:
driver_obj: Selenium WebDriver instance
selector_type: Type of selector ('id', 'name', 'xpath', 'class', 'css', 'tag', 'text', 'partial')
selector: The value of the selector
selection_criteria: Index (int) or visible text (str) for selection
Returns:
True if successful, False otherwise
Output:
- Prints confirmation showing the selected option index or text.
Example:
::
# Select by index
dropdown_select(driver, 'id', 'country-dropdown', 0) # Select first option
dropdown_select(driver, 'id', 'country-dropdown', 2) # Select third option
# Select by visible text
dropdown_select(driver, 'id', 'country-dropdown', 'United States')
dropdown_select(driver, 'name', 'language', 'English')
dropdown_select(driver, 'xpath', '//select[@name="city"]', 'New York')
# Different selector types
dropdown_select(driver, 'class', 'form-select', 'Option 1')
dropdown_select(driver, 'css', 'select.dropdown', 'Value')
"""
try:
# Using the get_web_element function to locate the dropdown element
dropdown_element = _get_web_element(driver_obj, selector_type, selector)
if dropdown_element is None:
print(f"Dropdown element not found: {selector_type} - {selector}")
return False
select = Select(dropdown_element)
# Selecting based on the type of 'selection_criteria'
if isinstance(selection_criteria, int):
select.select_by_index(selection_criteria)
print(f"Selected dropdown option by index: {selection_criteria}")
elif isinstance(selection_criteria, str):
select.select_by_visible_text(selection_criteria)
print(f"Selected dropdown option by text: '{selection_criteria}'")
else:
print("Invalid selection criteria. Must be an integer or string.")
return False
return True
except NoSuchElementException:
print(f"Option not found in the dropdown: {selection_criteria}")
return False
except Exception as e:
print(f"Error during dropdown selection: {e}")
return False
[docs]
def erase(*args):
"""
Erase/clear text from input fields.
Modes:
1. PyAutoGUI active window: ()
2. Selenium specific element: (driver, selector_type, selector)
Args:
*args: Variable arguments depending on mode
Returns:
True if successful, False otherwise
Example:
::
# PyAutoGUI mode (erase active window)
erase() # Select all and delete (Ctrl+A, Delete)
# Selenium mode (erase specific element)
erase(driver, 'id', 'username') # Clear username field
erase(driver, 'xpath', '//input[@name="email"]') # Clear email field
erase(driver, 'class', 'search-box') # Clear search box
"""
try:
# ------------------------------------------------------------
# MODE 1 : PYAUTOGUI ACTIVE WINDOW (no arguments)
# needs: display
# ------------------------------------------------------------
if len(args) == 0:
if not _GUI_AVAILABLE:
print("Error: erase() requires a display.")
return False
pyautogui.hotkey('ctrl', 'a', 'delete')
print("Erased content in active window")
return True
# ------------------------------------------------------------
# MODE 2 : SELENIUM ELEMENT (driver + selector)
# needs: nothing : works on all platforms
# ------------------------------------------------------------
elif len(args) == 3 and hasattr(args[0], 'find_element'):
driver_obj = args[0]
selector_type = args[1]
selector = args[2]
element = _get_web_element(driver_obj, selector_type, selector)
if element:
element.clear()
print(f"Cleared element: {selector_type} - {selector}")
return True
else:
print(f"Element not found: {selector_type} - {selector}")
return False
# ---------------Invalid Arguments---------------
else:
print("Error: Invalid arguments for erase()")
print("Use: erase() or erase(driver, selector_type, selector)")
return False
except Exception as e:
print(f"Error erasing content: {e}")
return False
[docs]
def find_browser(*args):
"""
Find text in browser using Ctrl+F (find function).
Args:
*args: Variable arguments depending on mode
Returns:
True if successful, False otherwise
Output:
- Prints confirmation with the searched text (PyAutoGUI).
- Prints confirmation if text was found and highlighted (Selenium).
- Prints a message if text was not found on the page (Selenium).
Example:
::
# PyAutoGUI mode (any window)
find_browser('Python') # Find in active window
find_browser('error message') # Find phrase
# Selenium mode (browser)
find_browser(driver, 'Python') # Find in Selenium browser
find_browser(driver, 'contact us') # Find phrase in browser
Note:
- PyAutoGUI: Opens find dialog (Ctrl+F), types search term, presses Enter, then Esc.
- Selenium: Uses JavaScript to highlight matching text on the page in yellow
and scrolls to the first match. Removes any previous highlights before applying new ones.
- Default wait time between actions is 1 second (PyAutoGUI only).
"""
wait_time = 1 # Default wait time (1 second)
try:
# ------------------------------------------------------------
# MODE 1 : SELENIUM (driver object passed)
# needs: nothing : works on all platforms
# ------------------------------------------------------------
if len(args) >= 2 and hasattr(args[0], 'execute_script'):
driver_obj = args[0]
search_text = args[1]
# Use JavaScript to find and highlight text
# Using raw string (r"...") to avoid escape sequence warnings
script = r"""
var searchText = arguments[0];
var body = document.body;
var innerHTML = body.innerHTML;
// Remove previous highlights
innerHTML = innerHTML.replace(/<mark style="background-color: yellow;">([^<]*)<\/mark>/gi, "$1");
// Add new highlights
var regex = new RegExp(searchText, "gi");
innerHTML = innerHTML.replace(regex, '<mark style="background-color: yellow;">$&</mark>');
body.innerHTML = innerHTML;
// Scroll to first match
var firstMatch = document.querySelector('mark');
if (firstMatch) {
firstMatch.scrollIntoView({ behavior: 'smooth', block: 'center' });
return true;
}
return false;
"""
# Pass search_text as argument to avoid string interpolation issues
result = driver_obj.execute_script(script, search_text)
if result:
print(f"[Selenium] Text '{search_text}' found and highlighted")
return True
else:
print(f"[Selenium] Text '{search_text}' not found on page")
return False
# ------------------------------------------------------------
# MODE 2 : PYAUTOGUI (no driver object)
# needs: display
# ------------------------------------------------------------
elif len(args) == 1:
if not _GUI_AVAILABLE:
print("Error: find_browser() requires a display.")
return False
search_text = args[0]
# Open browser find dialog
pyautogui.hotkey('ctrl', 'f')
time.sleep(wait_time)
# Type search text
pyautogui.typewrite(search_text)
time.sleep(wait_time)
# Press Enter to find
pyautogui.press('enter')
time.sleep(wait_time)
# Close find dialog
pyautogui.press('esc')
time.sleep(wait_time)
print(f"[PyAutoGUI] Searched for '{search_text}'")
return True
# ---------------Invalid Arguments---------------
else:
print("Error: Invalid arguments for find_browser()")
print("Use: find_browser(text) or find_browser(driver, text)")
return False
except Exception as e:
print(f"Error during browser find: {e}")
return False
[docs]
def find_key(data, key):
"""
Recursively finds all values of a specified key in nested data structures
(dictionaries, lists and tuples). Particularly useful for searching
deeply nested JSON data from API responses or parsed files.
Args:
data: Data structure to search (dict, list or tuple)
key: Key name to find
Returns:
list: All values found for the key (empty list if not found)
Example:
::
# Single occurrence
data = {'name': 'John', 'age': 30}
name = find_key(data, 'name')[0] # 'John'
# Multiple occurrences
data = {
'user': {'id': 1, 'name': 'Alice'},
'admin': {'id': 2, 'name': 'Bob'}
}
ids = find_key(data, 'id') # [1, 2]
# Nested lists/tuples
data = {'users': [{'age': 25}, {'age': 30}]}
ages = find_key(data, 'age') # [25, 30]
# API response workflow
response = requests.get('https://api.example.com/users').json()
ids = find_key(response, 'id') # finds all 'id' values
# Parsed file workflow
data = json.loads(read('data.json'))
hosts = find_key(data, 'host') # finds all 'host' values
"""
results = []
def extract(obj):
"""Recursively search for key in nested structure"""
if isinstance(obj, dict):
for k, v in obj.items():
if k == key:
results.append(v)
if isinstance(v, (dict, list, tuple)):
extract(v)
elif isinstance(obj, (list, tuple)):
for item in obj:
extract(item)
extract(data)
return results
[docs]
def find_str(string, starts_after, ends_before, index=0):
"""
Extracts substring between two markers.
Args:
string: Text to search in
starts_after: Start extraction after this
ends_before: End extraction before this
index: Which match (0=first, -1=last, 1=second, etc.)
Returns:
str or None: Extracted string or None if not found
Example:
::
# Extract version number from string
text = 'Version: 1.0.5 released'
version = find_str(text, 'Version: ', ' released')
# version = '1.0.5'
# Extract last occurrence using index=-1
text = 'User: Alice logged in. User: Bob logged in'
last_user = find_str(text, 'User: ', ' logged', -1)
# last_user = 'Bob'
"""
try:
# Escape special regex characters
starts_after = re.escape(starts_after)
ends_before = re.escape(ends_before)
# Build regex pattern
pattern = f'{starts_after}(.*?){ends_before}'
matches = re.findall(pattern, string)
if not matches:
return None
try:
return matches[index].strip()
except IndexError:
return None
except re.error as e:
print(f"Regex error in find_str: {e}")
return None
except Exception as e:
print(f"Error in find_str: {e}")
return None
[docs]
def hour():
"""
Get current hour.
Returns:
int: Current hour (0-23) in 24-hour format
Example:
hour() # 14 (2 PM)
"""
return time.localtime().tm_hour
[docs]
def inspect():
"""
Opens GUI to inspect pixel position and color with zoomed preview.
Usage:
1. Click on the Pixel Inspector window to bring it into focus.
2. Move the mouse to the desired pixel.
3. Press 'ESC' to capture.
Output:
- Prints position and RGB/HEX values to console.
- Copies 'x, y, r, g, b' to clipboard.
"""
# ------------------------------------------------------------
# GUARD : needs display, tkinter, pyautogui and clipboard
# ------------------------------------------------------------
if not _GUI_AVAILABLE:
print("Error: inspect() requires a display.")
return
if not _CLIPBOARD_AVAILABLE:
print("Error: inspect() requires a clipboard manager.")
return
# Track whether the inspector window is still open
window_open = True
def capture_pixel(event=None):
"""Called when ESC is pressed. Captures current mouse position and color."""
nonlocal window_open
try:
# Get current mouse position
x, y = pyautogui.position()
# Get the RGB color of the pixel at current mouse position
color = pyautogui.pixel(x, y)
# Convert RGB to HEX format
hex_color = f"#{color[0]:02x}{color[1]:02x}{color[2]:02x}".upper()
# Format the full output string for console
formatted_output = f"Pixel at ({x}, {y}): RGB {color} | HEX {hex_color}"
# Format the clipboard string as 'x, y, r, g, b'
clipboard_text = f"{x}, {y}, {color[0]}, {color[1]}, {color[2]}"
# Print to console
print(formatted_output)
print(f"Copied to clipboard: {clipboard_text}")
# Copy to clipboard
pyperclip.copy(clipboard_text)
# Mark window as closed and destroy the tkinter window
window_open = False
root.destroy()
except Exception as e:
print(f"Error capturing pixel: {e}")
# noinspection PyTypeChecker
def update_color_and_position():
"""Called repeatedly every 100ms to update the live preview."""
# Stop updating if the window has been closed
if not window_open:
return
try:
# Get current mouse position
x, y = pyautogui.position()
# Get screen dimensions to validate mouse position
screen_width, screen_height = pyautogui.size()
# Skip update if mouse is outside screen bounds
if x < 0 or y < 0 or x >= screen_width or y >= screen_height:
root.after(30, update_color_and_position)
return
# Get RGB color of the pixel at current mouse position
color = pyautogui.pixel(x, y)
# Convert RGB to HEX format
hex_color = f"#{color[0]:02x}{color[1]:02x}{color[2]:02x}".upper()
# Update the position, RGB and HEX labels in the GUI
position_label.config(text=f"Position: ({x}, {y})")
color_label.config(text=f"RGB: {color}")
hex_label.config(text=f"HEX: {hex_color}")
# Update the color swatch to show the current pixel color
color_display.config(bg=hex_color.lower())
# --- Zoomed Preview ---
# Define the capture area size around the cursor (30x30 pixels)
zoom_size = 30
# Calculate top-left corner of the capture region centered on cursor
left = max(0, x - zoom_size // 2)
top = max(0, y - zoom_size // 2)
# Clamp the region so it does not go beyond screen edges
left = min(left, screen_width - zoom_size)
top = min(top, screen_height - zoom_size)
# Take a screenshot of the small region around the cursor
screenshot = pyautogui.screenshot(region=(left, top, zoom_size, zoom_size))
# Resize the region to 240x240 for 8x magnification using nearest
# neighbor to keep pixels sharp and not blurred
zoomed = screenshot.resize((240, 240), Image.Resampling.NEAREST)
# Convert the zoomed image to a format tkinter can display
photo = ImageTk.PhotoImage(zoomed)
# Clear the canvas and draw the new zoomed image
zoom_canvas.delete("all")
zoom_canvas.create_image(120, 120, image=photo)
# Keep a reference to prevent garbage collection
zoom_canvas.image = photo
# Draw a red crosshair at the center of the zoomed preview
zoom_canvas.create_line(120, 110, 120, 130, fill='red', width=2) # vertical
zoom_canvas.create_line(110, 120, 130, 120, fill='red', width=2) # horizontal
except Exception as e:
print(f"Pixel preview update failed: {e}")
return
# Schedule the next update after 100ms
root.after(30, update_color_and_position)
# --- Build the tkinter GUI ---
# Create the main window
root = tk.Tk()
root.title("Pixel Inspector")
root.geometry("400x560")
root.configure(bg='#4D4D4D')
# Bind ESC key to capture_pixel so pressing ESC triggers the capture
root.bind('<Escape>', capture_pixel)
# Label to show current mouse position
position_label = tk.Label(root, text="Position:", font=("Helvetica", 12), bg='#4D4D4D', fg='white')
position_label.pack()
# Label to show current RGB values
color_label = tk.Label(root, text="RGB:", font=("Helvetica", 12), bg='#4D4D4D', fg='white')
color_label.pack()
# Label to show current HEX value
hex_label = tk.Label(root, text="HEX:", font=("Helvetica", 12), bg='#4D4D4D', fg='white')
hex_label.pack()
# Color swatch frame that fills with the current pixel color
color_display = tk.Frame(root, height=60, width=200)
color_display.pack(pady=10)
# Label above the zoomed preview canvas
zoom_label = tk.Label(root, text="Zoomed Preview (8x):", font=("Helvetica", 10), bg='#4D4D4D', fg='white')
zoom_label.pack()
# Canvas where the zoomed pixel preview is drawn
zoom_canvas = tk.Canvas(root, width=240, height=240, bg='#2D2D2D', highlightthickness=2,
highlightbackground='white')
zoom_canvas.pack(pady=5)
# Instruction label at the bottom
instruction_label = tk.Label(root, text="Press 'ESC' to capture and exit", font=("Helvetica", 10),
bg='#4D4D4D', fg='white')
instruction_label.pack(pady=10)
# Start the live update loop
update_color_and_position()
# Start the tkinter event loop : blocks here until window is closed
root.mainloop()
[docs]
def log_setup(title):
"""
Sets up logging and terminal styling for the script.
Combines terminal setup with comprehensive logging and automatic color-coded status indication.
Creates a logs folder and saves all output with timestamps.
Shows output in terminal while also saving to file.
Args:
title: Name for both terminal title and log file
Example:
::
log_setup("MyScript")
print("This gets logged")
# ... script runs ...
# Terminal turns GREEN on success or RED on crash
Log file format:
::
logs/log_MyScript_2026-03-26_14-30-45_IST_session-1.txt (active - newest logs)
logs/log_MyScript_2026-03-26_14-30-45_IST_session-1_part_1.txt (2nd newest - rotated)
logs/log_MyScript_2026-03-26_14-30-45_IST_session-1_part_2.txt (3rd newest)
...
logs/log_MyScript_2026-03-26_14-30-45_IST_session-1_part_9.txt (oldest backup)
Session numbering:
::
session-1 : First run of this script
session-2 : Second run of this script
session-3 : Third run, etc.
session-N : Automatically increments based on existing log files
Features:
- Sets terminal title and colors (blue bg, white text)
- Automatic color changes: Blue to Green (success) or Blue to Red (crash)
- Automatic session numbering (increments from previous runs)
- Captures all print() statements
- Captures all errors and exceptions
- Adds timestamp to each entry
- Shows output in terminal AND saves to file
- Automatic file rotation (10MB per file, max 10 files = 100MB per session)
- Automatic cleanup (keeps max 100MB total logs across all sessions)
Note:
Terminal colors change automatically based on script outcome::
Blue background : Script is running
Green background : Script completed successfully
Red background : Script crashed (unhandled exception)
"""
global _log_file_handler, _original_stdout, _original_stderr, _log_folder
# ---------- TERMINAL SETUP ----------
system = platform.system()
if system == "Windows":
# Clear the screen
os.system('cls')
# Set the window title (works in both CMD and PowerShell)
os.system(f'title {title}')
# Try CMD color command first (Blue background, white text)
result = os.system('color 1F')
# If color command failed (PowerShell), use ANSI escape codes
if result != 0:
# Enable ANSI color support in PowerShell
print("\033[97m\033[44m", end="", flush=True)
elif system == "Linux":
# Set the window title using ANSI escape codes
print(f"\33]0;{title}\a", end="", flush=True)
# Set terminal text color to bright white and background to blue
# ANSI: 97 = bright white text, 44 = blue background
print("\033[97m\033[44m", end="")
# Clear the screen
os.system('clear')
elif system == "Darwin":
print("Terminal colors not supported on macOS (logging will still work)")
# ---------- LOGGING SETUP ----------
# Create logs folder if it doesn't exist
_log_folder.mkdir(exist_ok=True)
# ---------- DETERMINE SESSION NUMBER ----------
# Find all existing log files for this script title
existing_logs = list(_log_folder.glob(f"log_{title}_*_session-*.txt"))
# Extract session numbers from existing log files
session_numbers = []
for log_file in existing_logs:
# Pattern: log_title_timestamp_timezone_session-N.txt
filename = log_file.stem # Get filename without extension
# Extract session number using regex
match = re.search(r'_session-(\d+)', filename)
if match:
session_numbers.append(int(match.group(1)))
# Determine next session number
if session_numbers:
next_session = max(session_numbers) + 1
else:
next_session = 1
print(f"Starting session {next_session} for '{title}'")
# ---------- GET TIMESTAMP WITH TIMEZONE ----------
# Get current time with system's local timezone
try:
now = datetime.datetime.now().astimezone() # auto-detects system timezone
time_zone_name = now.tzname() or "LOCAL" # e.g., IST, EST, PDT
except Exception:
now = datetime.datetime.now()
time_zone_name = "LOCAL"
# Create log filename with session number
timestamp = now.strftime("%Y-%m-%d_%H-%M-%S")
log_filename = f"log_{title}_{timestamp}_{time_zone_name}_session-{next_session}.txt"
log_filepath = _log_folder / log_filename
# ---------- CONFIGURE LOGGER ----------
# Configure logger
logger = logging.getLogger(title)
logger.setLevel(logging.DEBUG)
# Remove existing handlers
logger.handlers.clear()
# Create file handler with rotation (10MB per file)
_log_file_handler = _CustomRotatingFileNameHandler(
log_filepath,
maxBytes=10 * 1024 * 1024, # 10MB
backupCount=9, # 10 files total (1 active + 9 backups) = 100MB
encoding='utf-8' # fixes UnicodeEncodeError for special characters
)
# Create formatter with timestamp
formatter = logging.Formatter(
'%(asctime)s -> %(message)s',
datefmt='%Y-%m-%d %H:%M:%S'
)
_log_file_handler.setFormatter(formatter)
logger.addHandler(_log_file_handler)
# Silence the root logger for the entire session so third-party libraries
# that add StreamHandlers to it cannot produce duplicate "INFO:..." lines in our logs.
# Setting level above CRITICAL blocks all messages at the source,
# regardless of how many handlers any library adds to the root logger.
# Our named logger is unaffected as it has its own independent level.
logging.getLogger().handlers.clear()
logging.getLogger().addHandler(logging.NullHandler())
logging.getLogger().setLevel(logging.CRITICAL + 1)
# Redirect stdout and stderr
_original_stdout = sys.stdout
_original_stderr = sys.stderr
sys.stdout = _LogCapture(_original_stdout, logger, logging.INFO)
sys.stderr = _LogCapture(_original_stderr, logger, logging.ERROR)
# Log start message
logger.info(f"Logging started for: {title} (Session {next_session})")
logger.info(f"Log file: {log_filepath}")
# ---------- SETUP SUCCESS/ERROR COLOR HANDLERS ----------
global _script_had_error # Declare at start of this section
# Store original exception handler
original_excepthook = sys.excepthook
def set_success_color():
"""Change terminal to green on successful completion"""
global _original_stdout, _original_stderr, _script_had_error, _log_file_handler
# Don't show success if there was an error
if _script_had_error:
return
# LOG SUCCESS MESSAGE directly to file (before restoring streams)
if _log_file_handler:
try:
# Create a log record manually
record = logging.LogRecord(
name="completion",
level=logging.INFO,
pathname="",
lineno=0,
msg="Script execution completed successfully",
args=(),
exc_info=None
)
_log_file_handler.emit(record)
_log_file_handler.flush() # Ensure it's written
except Exception as e:
print(f"Could not write script completion status to log file: {e}")
# NOW restore original stdout/stderr
if _original_stdout:
sys.stdout = _original_stdout
sys.stderr = _original_stderr
# Change color (terminal only)
if platform.system() == "Windows":
os.system('color 2F') # Green background, white text
elif platform.system() == "Linux":
print("\033[97m\033[42m", end="", flush=True) # Green bg, white text
def set_error_color(exc_type, exc_value, exc_traceback):
"""Change terminal to red on unhandled exception"""
global _original_stdout, _original_stderr, _script_had_error
# Mark that an error occurred
_script_had_error = True
# LOG THE EXCEPTION FIRST (while stderr is still redirected to log)
error_msg = ''.join(traceback.format_exception(exc_type, exc_value, exc_traceback))
print(error_msg, file=sys.stderr, end='')
# NOW restore original stdout/stderr
if _original_stdout:
sys.stdout = _original_stdout
sys.stderr = _original_stderr
# Change color
if platform.system() == "Windows":
os.system('color 4F') # Red background, white text
elif platform.system() == "Linux":
print("\033[97m\033[41m", end="", flush=True) # Red bg, white text
# DON'T call original_excepthook - we already printed the error!
# (Calling it would print the error a second time)
# Register success handler (runs on normal exit)
atexit.register(set_success_color)
# Register error handler (runs on unhandled exception)
sys.excepthook = set_error_color
[docs]
def minute():
"""
Get current minute.
Returns:
int: Current minute (0-59)
Example:
minute() # 23
"""
return time.localtime().tm_min
[docs]
def month():
"""
Get current month.
Returns:
int: Current month (1-12)
Example:
month() # 2 (February)
"""
return time.localtime().tm_mon
[docs]
def press(*keys):
"""
Press keyboard keys with support for Selenium, PyAutoGUI and key combinations.
Modes:
1. PyAutoGUI single key: (key)
2. PyAutoGUI key N times: (key, count)
3. PyAutoGUI key combination: (key1, key2, ...)
4. Selenium driver key: (driver, key)
5. Selenium driver key N times: (driver, key, count)
6. Selenium driver key combination: (driver, key1, key2, ...)
7. Selenium element key: (driver, selector_type, selector, key)
Args:
*keys: Variable arguments for key presses
Returns:
True if successful, False otherwise
Example:
::
# PyAutoGUI keys (no driver needed)
press("tab") # Single key
press("tab", 5) # Press 5 times
press("tab", -5) # Press 5 times with shift held
press("ctrl", "a") # Two-key combo
press("alt", "ctrl", "z") # Three-key combo
press("num5") # Numpad 5
press("volumeup") # Volume up
press("mute") # Volume mute (short form)
press("back") # Browser back (short form)
press("forward") # Browser forward (short form)
# Selenium driver keys (pass driver object)
press(driver, "tab")
press(driver, "tab", 5) # Press tab 5 times
press(driver, "tab", -5) # Press shift+tab 5 times
press(driver, "ctrl", "c")
press(driver, "ctrl", "shift", "s")
# Selenium element + key (driver, selector_type, selector, key)
press(driver, "xpath", "//input", "enter")
press(driver, "id", "username", "tab")
Note:
- Negative count presses the key with Shift held (e.g. Shift+Tab for reverse navigation).
- PyAutoGUI-only keys (num0-9, volumeup, volumedown, mute, back, forward, etc.)
are not supported in Selenium mode.
- Short forms supported: 'back' for browserback, 'forward' for browserforward,
'mute' for volumemute.
"""
# Key mappings for special keys
key_map = {
'enter': Keys.RETURN,
'tab': Keys.TAB,
'up': Keys.ARROW_UP,
'down': Keys.ARROW_DOWN,
'left': Keys.ARROW_LEFT,
'right': Keys.ARROW_RIGHT,
'alt': Keys.ALT,
'backspace': Keys.BACKSPACE,
'esc': Keys.ESCAPE,
'escape': Keys.ESCAPE,
'home': Keys.HOME,
'end': Keys.END,
'insert': Keys.INSERT,
'delete': Keys.DELETE,
'pageup': Keys.PAGE_UP,
'pagedown': Keys.PAGE_DOWN,
'shift': Keys.SHIFT,
'ctrl': Keys.CONTROL,
'control': Keys.CONTROL,
'space': Keys.SPACE,
'pause': Keys.PAUSE,
'f1': Keys.F1, 'f2': Keys.F2, 'f3': Keys.F3, 'f4': Keys.F4,
'f5': Keys.F5, 'f6': Keys.F6, 'f7': Keys.F7, 'f8': Keys.F8,
'f9': Keys.F9, 'f10': Keys.F10, 'f11': Keys.F11, 'f12': Keys.F12,
'windows': Keys.COMMAND,
'command': Keys.COMMAND
}
# PyAutoGUI-only keys (not supported in Selenium)
pyautogui_only_keys = {
'num0', 'num1', 'num2', 'num3', 'num4', 'num5', 'num6', 'num7', 'num8', 'num9',
'numlock', 'volumeup', 'volumedown', 'volumemute', 'playpause',
'browserback', 'browserforward', 'printscreen', 'capslock', 'scrolllock',
# Short forms
'back', 'forward', 'mute'
}
# Short form mappings for PyAutoGUI keys
pyautogui_short_forms = {
'back': 'browserback',
'forward': 'browserforward',
'mute': 'volumemute'
}
# Valid PyAutoGUI keys (comprehensive list) to check valid keys
valid_pyautogui_keys = {
'\t', '\n', '\r', ' ', '!', '"', '#', '$', '%', '&', "'", '(',
')', '*', '+', ',', '-', '.', '/', '0', '1', '2', '3', '4', '5', '6', '7',
'8', '9', ':', ';', '<', '=', '>', '?', '@', '[', '\\', ']', '^', '_', '`',
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '{', '|', '}', '~',
'accept', 'add', 'alt', 'altleft', 'altright', 'apps', 'backspace',
'browserback', 'browserfavorites', 'browserforward', 'browserhome',
'browserrefresh', 'browsersearch', 'browserstop', 'capslock', 'clear',
'convert', 'ctrl', 'ctrlleft', 'ctrlright', 'decimal', 'del', 'delete',
'divide', 'down', 'end', 'enter', 'esc', 'escape', 'execute', 'f1', 'f10',
'f11', 'f12', 'f13', 'f14', 'f15', 'f16', 'f17', 'f18', 'f19', 'f2', 'f20',
'f21', 'f22', 'f23', 'f24', 'f3', 'f4', 'f5', 'f6', 'f7', 'f8', 'f9',
'final', 'fn', 'hanguel', 'hangul', 'hanja', 'help', 'home', 'insert', 'junja',
'kana', 'kanji', 'launchapp1', 'launchapp2', 'launchmail',
'launchmediaselect', 'left', 'modechange', 'multiply', 'nexttrack',
'nonconvert', 'num0', 'num1', 'num2', 'num3', 'num4', 'num5', 'num6',
'num7', 'num8', 'num9', 'numlock', 'pagedown', 'pageup', 'pause', 'pgdn',
'pgup', 'playpause', 'prevtrack', 'print', 'printscreen', 'prntscrn',
'prtsc', 'prtscr', 'return', 'right', 'scrolllock', 'select', 'separator',
'shift', 'shiftleft', 'shiftright', 'sleep', 'space', 'stop', 'subtract', 'tab',
'up', 'volumedown', 'volumemute', 'volumeup', 'win', 'winleft', 'winright', 'yen',
'command', 'option', 'optionleft', 'optionright',
# Short forms
'back', 'forward', 'mute'
}
try:
# ------------------------------------------------------------
# SELENIUM MODE - Check if first arg is WebDriver object
# needs: nothing : works on all platforms
# ------------------------------------------------------------
if len(keys) > 0 and hasattr(keys[0], 'find_element'):
driver_obj = keys[0]
# ----------------------------------------------------------
# SELENIUM ELEMENT + KEY (driver, selector_type, selector, key)
# ----------------------------------------------------------
if len(keys) >= 4 and keys[1] in ('id', 'name', 'xpath', 'text', 'partial', 'tag', 'class', 'css'):
selector_type = keys[1]
selector = keys[2]
key_name = keys[3].lower()
# Check if it's a PyAutoGUI-only key
if key_name in pyautogui_only_keys:
print(f"Warning: '{key_name}' key not supported in Selenium. Use PyAutoGUI instead.")
return False
key = key_map.get(key_name, key_name) # Use mapping or raw key
# Find element and send key
element = _get_web_element(driver_obj, selector_type, selector)
if element:
element.send_keys(key)
return True
else:
print(f"Element not found with {selector_type}: {selector}")
return False
# ----------------------------------------------------------
# SELENIUM DRIVER KEYS (driver + keys)
# ----------------------------------------------------------
else:
action = ActionChains(driver_obj)
# Single key on driver: press(driver, "tab")
if len(keys) == 2:
key_name = keys[1].lower()
# Check if it's a PyAutoGUI-only key
if key_name in pyautogui_only_keys:
print(
f"Warning: '{key_name}' key not supported in Selenium. Use PyAutoGUI press('{key_name}') instead.")
return False
key = key_map.get(key_name, keys[1]) # Use mapping or raw string
action.send_keys(key).perform()
return True
# Key + count: press(driver, "tab", 5)
elif len(keys) == 3 and isinstance(keys[2], int):
count = keys[2]
key_name = keys[1].lower()
# Check if it's a PyAutoGUI-only key
if key_name in pyautogui_only_keys:
print(f"Warning: '{key_name}' key not supported in Selenium.")
return False
key = key_map.get(key_name, keys[1])
# Positive count: press normally
if count >= 0:
for _ in range(count):
action.send_keys(key).perform()
time.sleep(1)
# Negative count: press with shift held
else:
for _ in range(abs(count)):
action.key_down(Keys.SHIFT).send_keys(key).key_up(Keys.SHIFT).perform()
time.sleep(1)
return True
# Multiple keys (combination): press(driver, "ctrl", "c")
elif len(keys) >= 3:
# Check if any key is PyAutoGUI-only
for i in range(1, len(keys)):
if keys[i].lower() in pyautogui_only_keys:
print(f"Warning: '{keys[i]}' key not supported in Selenium.")
return False
# Press all keys down in order
for i in range(1, len(keys)):
key_name = keys[i].lower()
key = key_map.get(key_name, keys[i])
# If it's a modifier key, hold it down
if key in (Keys.CONTROL, Keys.SHIFT, Keys.ALT, Keys.COMMAND):
action.key_down(key)
else:
# Regular key - just send it
action.send_keys(key)
# Release all modifier keys in reverse order
for i in range(len(keys) - 1, 0, -1):
key_name = keys[i].lower()
key = key_map.get(key_name, keys[i])
if key in (Keys.CONTROL, Keys.SHIFT, Keys.ALT, Keys.COMMAND):
action.key_up(key)
action.perform()
return True
# ------------------------------------------------------------
# PYAUTOGUI MODE - No driver object
# needs: display
# ------------------------------------------------------------
else:
if not _GUI_AVAILABLE:
print("Error: press() requires a display.")
return False
# Single key press
if len(keys) == 1:
key_name = keys[0].lower()
# Translate short form to full form
actual_key = pyautogui_short_forms.get(key_name, keys[0])
# Validate key
if actual_key.lower() not in valid_pyautogui_keys and len(actual_key) > 1:
print(f"Error: Invalid key '{keys[0]}'")
return False
pyautogui.press(actual_key)
return True
# Key + count (press n times)
elif len(keys) == 2 and isinstance(keys[1], int):
count = keys[1]
key_name = keys[0].lower()
# Translate short form to full form
actual_key = pyautogui_short_forms.get(key_name, keys[0])
# Validate key
if actual_key.lower() not in valid_pyautogui_keys and len(actual_key) > 1:
print(f"Error: Invalid key '{keys[0]}'")
return False
# Positive count: press normally
if count >= 0:
for _ in range(count):
pyautogui.press(actual_key)
time.sleep(1)
# Negative count: press with shift held
else:
pyautogui.keyDown('shift')
for _ in range(abs(count)):
pyautogui.press(actual_key)
time.sleep(1)
pyautogui.keyUp('shift')
return True
# Multiple key combination (hotkey)
else:
# Translate short forms to full forms and validate
translated_keys = []
for key in keys:
key_name = key.lower() if isinstance(key, str) else key
translated_key = pyautogui_short_forms.get(key_name, key)
# Validate each key
if translated_key.lower() not in valid_pyautogui_keys and len(str(translated_key)) > 1:
print(f"Error: Invalid key '{key}' in combination")
return False
translated_keys.append(translated_key)
pyautogui.hotkey(*translated_keys)
return True
except Exception as e:
print(f"Error pressing keys {keys}: {e}")
return False
[docs]
def read(*args):
"""
Extract text from screen (using OCR), files (by parsing file format) or a Selenium browser window.
Modes:
1. No arguments: OCR full screen
2. 2 integers: OCR from (x, y) to bottom-right corner
3. 4 integers: OCR specific region (x, y, width, height)
4. 1 string: Read file by parsing its format
5. 1 driver object: Take screenshot of Selenium browser and read using OCR
Supported file formats:
- Documents: PDF, DOCX, PPTX, ODT, RTF
- Tabular: CSV, TSV, XLSX, SQLite
- Structured: JSON, YAML, XML, INI/CFG
- Text: TXT, LOG, MD
- Web: HTML
- Email: EML, MSG
- eBooks: EPUB
- Scripts: SH, BAT, PY
Args:
*args: Variable arguments depending on mode
Returns:
str: Extracted text or None if error
Example:
::
# OCR - Read entire screen
text = read()
# OCR - Read from (100, 200) to bottom-right corner
text = read(100, 200)
# OCR - Read specific region: x=100, y=200, width=400, height=300
text = read(100, 200, 400, 300)
# Selenium - Read text from browser window using OCR
d1 = browser('https://example.com')
text = read(d1)
# File - Read with extension
text = read('report.pdf')
text = read('data.csv')
text = read('script.py')
# File - Read without extension (auto-detects)
text = read('report') # Finds report.pdf automatically
text = read('config') # Finds config.yaml automatically
# Check if text on screen
if 'login' in read():
print("Login visible!")
Note:
OCR first run downloads models (~100MB), subsequent runs are fast.
"""
# ------------------------------------------------------------
# DETERMINE MODE AND VALIDATE ARGUMENTS
# ------------------------------------------------------------
is_ocr = False
region = None
# MODE 1: OCR - No arguments (full screen)
if len(args) == 0:
is_ocr = True
region = None # Full screen
# MODE 2: OCR - 2 integers (from x,y to bottom-right corner)
elif len(args) == 2 and all(isinstance(arg, int) for arg in args):
is_ocr = True
x, y = args
# Validate coordinates
if x < 0 or y < 0:
print(f"Error: Coordinates must be non-negative (x={x}, y={y})")
return None
# Get screen size
screen_width, screen_height = pyautogui.size()
# Calculate width and height to bottom-right corner
width = screen_width - x
height = screen_height - y
if width <= 0 or height <= 0:
print(f"Error: Starting point ({x}, {y}) is outside screen bounds")
return None
region = (x, y, width, height)
# MODE 3: OCR - 4 integers (screen region: x, y, width, height)
elif len(args) == 4 and all(isinstance(arg, int) for arg in args):
is_ocr = True
x, y, width, height = args
# Validate region
if width <= 0 or height <= 0:
print(f"Error: Invalid region dimensions (width={width}, height={height})")
return None
if x < 0 or y < 0:
print(f"Error: Coordinates must be non-negative (x={x}, y={y})")
return None
region = (x, y, width, height)
# ------------------------------------------------------------
# PERFORM OCR (modes 1, 2, 3 : needs display)
# ------------------------------------------------------------
if is_ocr:
if not _GUI_AVAILABLE:
print("Error: read() screen/OCR modes require a display.")
return None
try:
# Get shared OCR reader
reader = _get_ocr_reader()
# Capture screenshot (region or full screen)
if region:
image = pyautogui.screenshot(region=region)
else:
image = pyautogui.screenshot()
# Convert PIL image to numpy array
image = np.array(image)
cleaned = _preprocess_for_ocr(image)
# Extract text using EasyOCR
results = reader.readtext(cleaned, detail=0)
# If no text was found
if not results:
print(f"OCR: No text found.")
return "" # Return blank string (not None) to avoid errors in blank text case
# Join all detected text with spaces
text = ' '.join(results)
return text.strip()
except ImportError as e:
print(f"Error: Required library not installed - {e}")
print("Install with: pip install easyocr opencv-python")
return None
except Exception as e:
print(f"Error during OCR: {e}")
return None
# ------------------------------------------------------------
# MODE 4: Selenium driver - screenshot browser and OCR
# needs: nothing for screenshot : OCR runs on bytes, no display needed
# ------------------------------------------------------------
elif len(args) == 1 and hasattr(args[0], 'find_element'):
driver_obj = args[0]
try:
reader = _get_ocr_reader()
# Get screenshot as bytes directly without saving to disk
png_bytes = driver_obj.get_screenshot_as_png()
image = np.array(Image.open(io.BytesIO(png_bytes)))
cleaned = _preprocess_for_ocr(image)
results = reader.readtext(cleaned, detail=0)
if not results:
print("OCR: No text found in browser window.")
return ""
text = ' '.join(results)
return text.strip()
except Exception as e:
print(f"Could not read text from browser window: {e}")
return None
# ------------------------------------------------------------
# MODE 5: File reading - 1 string (file path)
# needs: nothing : safe on all platforms including servers
# ------------------------------------------------------------
elif len(args) == 1 and isinstance(args[0], str):
file = args[0]
# ------------------------------------------------------------
# AUTO-DETECT FILE EXTENSION IF NOT PROVIDED
# ------------------------------------------------------------
if not os.path.splitext(file)[1]: # No extension provided
# Get directory and filename
if os.path.dirname(file):
directory = os.path.dirname(file)
filename = os.path.basename(file)
else:
directory = '.'
filename = file
# Find files matching the name
try:
matching_files = [f for f in os.listdir(directory)
if os.path.splitext(f)[0] == filename]
if len(matching_files) == 1:
# Found exactly one file - use it
file = os.path.join(directory, matching_files[0])
print(f"Auto-detected file: {file}")
elif len(matching_files) > 1:
print(f"Error: Multiple files found with name '{filename}':")
for f in matching_files:
print(f" - {f}")
print("Please specify file extension.")
return None
else:
print(f"Error: No file found with name '{filename}'")
return None
except Exception as e:
print(f"Error searching for file: {e}")
return None
# Get file extension
_, ext = os.path.splitext(file)
ext = ext.lower()
try:
# ------------------------------------------------------------
# DOCUMENTS
# ------------------------------------------------------------
# PDF files
if ext == '.pdf':
with open(file, 'rb') as pdf_file:
pdf_reader = PdfReader(pdf_file)
return '\r\n'.join([pdf_reader.pages[i].extract_text() for i in range(len(pdf_reader.pages))])
# Word documents
elif ext in ['.docx', '.doc']:
doc = Document(file)
return '\n'.join([paragraph.text for paragraph in doc.paragraphs])
# PowerPoint presentations
elif ext in ['.pptx', '.ppt']:
prs = Presentation(file)
text = []
for slide_num, slide in enumerate(prs.slides, 1):
text.append(f"=== Slide {slide_num} ===")
for shape in slide.shapes:
if hasattr(shape, "text"):
text.append(shape.text)
text.append('')
return '\n'.join(text)
# OpenDocument Text (LibreOffice)
elif ext == '.odt':
textdoc = odf_load(file)
allparas = textdoc.getElementsByType(P)
return '\n'.join([teletype.extractText(p) for p in allparas])
# Rich Text Format
elif ext == '.rtf':
with open(file, 'r', encoding='utf-8', errors='ignore') as f:
return rtf_to_text(f.read())
# ------------------------------------------------------------
# TABULAR DATA (with >>>Row_X: format)
# ------------------------------------------------------------
# CSV files
elif ext == '.csv':
with open(file, 'r', encoding='utf-8', errors='ignore') as f:
reader = csv.reader(f)
rows = list(reader)
if not rows:
return ''
text = []
# First row as header
text.append('\t'.join(rows[0]))
text.append('-' * 60)
# Data rows with >>>Row_X: format
for row_num, row in enumerate(rows[1:], 1):
row_data = '\t'.join(row)
text.append(f">>>Row_{row_num}:\t{row_data}")
return '\n'.join(text)
# TSV files (Tab-separated values)
elif ext == '.tsv':
with open(file, 'r', encoding='utf-8', errors='ignore') as f:
reader = csv.reader(f, delimiter='\t')
rows = list(reader)
if not rows:
return ''
text = []
# First row as header
text.append('\t'.join(rows[0]))
text.append('-' * 60)
# Data rows with >>>Row_X: format
for row_num, row in enumerate(rows[1:], 1):
row_data = '\t'.join(row)
text.append(f">>>Row_{row_num}:\t{row_data}")
return '\n'.join(text)
# Excel files
elif ext in ['.xlsx', '.xlsm']:
wb = load_workbook(file, data_only=True)
text = []
for sheet in wb.worksheets:
text.append(f"\n{'=' * 60}")
text.append(f"Sheet: {sheet.title}")
text.append('=' * 60)
rows = list(sheet.iter_rows(values_only=True))
if rows:
# First row as header
text.append('\t'.join([str(cell) if cell is not None else '' for cell in rows[0]]))
text.append('-' * 60)
# Data rows with >>>Row_X: format
for row_num, row in enumerate(rows[1:], 1):
row_data = '\t'.join([str(cell) if cell is not None else 'NULL' for cell in row])
text.append(f">>>Row_{row_num}:\t{row_data}")
text.append('')
return '\n'.join(text)
# SQLite databases
elif ext in ['.db', '.sqlite', '.sqlite3']:
conn = sqlite3.connect(file)
cursor = conn.cursor()
cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
tables = cursor.fetchall()
text = []
for table in tables:
table_name = table[0]
text.append(f"\n{'=' * 60}")
text.append(f"Table: {table_name}")
text.append('=' * 60)
# Get column names
cursor.execute(f"PRAGMA table_info({table_name})")
columns = [col[1] for col in cursor.fetchall()]
# Add column headers
text.append('\t'.join(columns))
text.append('-' * 60)
# Get data (limit to 100 rows)
cursor.execute(f"SELECT * FROM {table_name} LIMIT 100")
rows = cursor.fetchall()
# Add rows with >>>Row_X: format
for row_num, row in enumerate(rows, 1):
row_data = '\t'.join([str(cell) if cell is not None else 'NULL' for cell in row])
text.append(f">>>Row_{row_num}:\t{row_data}")
text.append(f"\nTotal_rows: {len(rows)}\n")
conn.close()
return '\n'.join(text)
# ------------------------------------------------------------
# STRUCTURED DATA
# ------------------------------------------------------------
# JSON files
elif ext == '.json':
with open(file, 'r', encoding='utf-8') as f:
data = json.load(f)
return json.dumps(data, indent=2)
# YAML files
elif ext in ['.yaml', '.yml']:
with open(file, 'r', encoding='utf-8') as f:
data = yaml.safe_load(f)
return yaml.dump(data, default_flow_style=False, sort_keys=False)
# XML files
elif ext == '.xml':
tree = et.parse(file)
return et.tostring(tree.getroot(), encoding='unicode', method='xml')
# INI/CFG files
elif ext in ['.ini', '.cfg']:
config = configparser.ConfigParser()
config.read(file)
text = []
for section in config.sections():
text.append(f"[{section}]")
for key, value in config.items(section):
text.append(f"{key} = {value}")
text.append('')
return '\n'.join(text)
# ------------------------------------------------------------
# TEXT FILES
# ------------------------------------------------------------
# Plain text, log files, markdown
elif ext in ['.txt', '.log', '.md']:
with open(file, 'r', encoding='utf-8', errors='ignore') as f:
return f.read()
# ------------------------------------------------------------
# WEB
# ------------------------------------------------------------
# HTML files
elif ext in ['.html', '.htm']:
with open(file, 'r', encoding='utf-8', errors='ignore') as f:
soup = BeautifulSoup(f.read(), 'html.parser')
return soup.get_text('\n').strip()
# ------------------------------------------------------------
# EMAIL
# ------------------------------------------------------------
# EML files (standard email)
elif ext == '.eml':
with open(file, 'r', encoding='utf-8', errors='ignore') as f:
msg = email.message_from_file(f)
text = []
text.append(f"From: {msg.get('From')}")
text.append(f"To: {msg.get('To')}")
text.append(f"Subject: {msg.get('Subject')}")
text.append(f"Date: {msg.get('Date')}")
text.append("\n--- Body ---")
if msg.is_multipart():
for part in msg.walk():
if part.get_content_type() == "text/plain":
payload = part.get_payload(decode=True)
if payload:
text.append(payload.decode('utf-8', errors='ignore'))
else:
payload = msg.get_payload(decode=True)
if payload:
text.append(payload.decode('utf-8', errors='ignore'))
return '\n'.join(text)
# MSG files (Outlook email)
elif ext == '.msg':
msg = extract_msg.Message(file)
text = []
text.append(f">>>From: {msg.sender}")
text.append(f">>>To: {msg.to}")
text.append(f">>>Subject: {msg.subject}")
text.append(f">>>Date: {msg.date}")
text.append("\nBody ---")
text.append(msg.body)
msg.close()
return '\n'.join(text)
# ------------------------------------------------------------
# EBOOKS
# ------------------------------------------------------------
# EPUB files
elif ext == '.epub':
book = epub.read_epub(file)
text = []
for item in book.get_items_of_type(ebooklib.ITEM_DOCUMENT):
soup = BeautifulSoup(item.get_content(), 'html.parser')
text.append(soup.get_text())
return '\n\n'.join(text)
# ------------------------------------------------------------
# SCRIPT FILES
# ------------------------------------------------------------
# Shell scripts (Linux)
elif ext == '.sh':
with open(file, 'r', encoding='utf-8', errors='ignore') as f:
return f.read()
# Batch files (Windows)
elif ext == '.bat':
with open(file, 'r', encoding='utf-8', errors='ignore') as f:
return f.read()
# Python scripts
elif ext == '.py':
with open(file, 'r', encoding='utf-8', errors='ignore') as f:
return f.read()
# ------------------------------------------------------------
# UNSUPPORTED FORMAT
# ------------------------------------------------------------
else:
print(f"Unsupported file format: {ext}")
print(f"Supported formats: PDF, DOCX, PPTX, ODT, RTF, CSV, TSV, XLSX, SQLite,")
print(f" JSON, YAML, XML, INI/CFG, TXT, LOG, MD, HTML,")
print(f" EML, MSG, EPUB, SH, BAT, PY")
return None
except Exception as e:
print(f"Error reading file {file}: {e}")
return None
# ---------------Invalid Arguments---------------
else:
print("Error: Invalid arguments for read()")
print("Use: read() - OCR full screen")
print(" read(x, y) - OCR from (x,y) to bottom-right")
print(" read(x, y, width, height) - OCR specific region")
print(" read(driver) - OCR browser window")
print(" read('file.pdf') - Read file")
return None
[docs]
def run(target):
"""
Runs a file or application on Windows and Linux.
Args:
target: File path or application name to execute
- If target is a file path: Opens with default application
- If target is an application name: Launches the application
- For applications, the command must be available in system PATH
Raises:
NotImplementedError: If called on macOS
Output:
- Prints error message if file or application was not found.
- Prints error message if permission was denied.
Example:
::
# Open files with default application
run("sample.txt") # Opens in default text editor
run("document.pdf") # Opens in default PDF viewer
run("C:\\Users\\file.xlsx") # Opens Excel file
# Launch applications (Windows)
run("calc") # Calculator
run("notepad") # Notepad
run("mspaint") # Paint
# Launch applications (Linux)
run("gedit") # Text editor
run("firefox") # Browser
run("gnome-calculator") # Calculator
Note:
- Windows: Uses os.startfile for files, subprocess for applications.
- Linux: Uses xdg-open for files, direct execution for applications.
xdg-utils is required (included in Linux dependencies).
"""
# Check if macOS and reject
if sys.platform.startswith('darwin') or platform.system() == "Darwin":
raise NotImplementedError(
"run() is not supported on macOS.\n"
"Supported platforms: Windows, Linux\n"
"Use subprocess.Popen() directly for macOS-specific needs."
)
try:
# ------------------------------------------------------------
# WINDOWS
# ------------------------------------------------------------
if sys.platform.startswith('win'):
if os.path.isfile(target):
# File exists - open with default application
# Using os.startfile is more reliable than shell=True
os.startfile(target)
else:
# Assume it's an application name
# Use shell=True to allow PATH resolution
subprocess.Popen(target, shell=True)
# ------------------------------------------------------------
# LINUX
# ------------------------------------------------------------
elif sys.platform.startswith('linux'):
if os.path.isfile(target):
# File exists - open with default application using xdg-open
item_path = os.path.abspath(target)
subprocess.Popen(['xdg-open', item_path])
else:
# Assume it's an application name - try to execute directly
subprocess.Popen([target])
else:
# Unknown platform
raise NotImplementedError(
f"run() is not supported on platform: {sys.platform}\n"
"Supported platforms: Windows (win32), Linux (linux)"
)
except FileNotFoundError:
print(f"Error: Application or file not found: '{target}'")
print("For applications, ensure the command is in your system PATH.")
except PermissionError:
print(f"Error: Permission denied to run: '{target}'")
print("Check file permissions or try running with appropriate privileges.")
except Exception as e:
print(f"Error running '{target}': {e}")
[docs]
def say(text, volume=1.0):
"""
Speak text using offline Text-to-Speech via Piper TTS.
Args:
text: Text to speak
volume: Volume level 0.0 to 1.0 (default: 1.0)
Returns:
None
Example:
::
say("Hello, how are you?")
say("Download complete.")
say("Error occurred, please try again.", volume=0.7)
say("Warning: Low battery.", volume=0.5)
Note:
- Automatically logs spoken text when log_setup() is active.
- Requires: pip install piper-tts huggingface_hub
- Linux requires: sudo apt install espeak-ng alsa-utils
- Model files are saved in: Windows → %LOCALAPPDATA%\\autocore\\piper_models\\
Linux → ~/.local/share/autocore/piper_models/
- Model size is approximately 60MB, downloaded once and reused.
- Browse all voices at: https://huggingface.co/rhasspy/piper-voices
"""
# ------------------------------------------------------------
# GUARD — needs audio system
# _AUDIO_AVAILABLE is set at file level during library import.
# if audio init failed (headless server, no sound drivers etc.)
# we bail out early instead of crashing deep inside piper
# ------------------------------------------------------------
if not _AUDIO_AVAILABLE:
print("Error: say() requires an audio system.")
return
# ensure caller passed a string — piper will crash on anything else
if not isinstance(text, str):
raise TypeError("Text to speak must be a string")
# piper's SynthesisConfig will silently clamp or error outside this range
if not (0.0 <= volume <= 1.0):
raise ValueError("Volume must be between 0.0 and 1.0")
try:
# ------------------------------------------------------------
# LAZY IMPORTS
# kept here intentionally and NOT moved to the top of the file
# because:
# - piper and huggingface_hub are optional heavy dependencies
# if not installed, the rest of the library still works fine
# - winsound is Windows-only, importing at file level would
# crash on Linux
# ------------------------------------------------------------
from piper.voice import PiperVoice, SynthesisConfig
# ------------------------------------------------------------
# MODEL PATHS
# models are stored in a user-writable location on each platform
# so we never hit permission errors writing into Program Files
# or other system-protected directories
#
# Windows → C:\Users\YourUsername\AppData\Local\autocore\piper_models\
# Linux → /home/yourusername/.local/share/autocore/piper_models/
#
# os.environ["LOCALAPPDATA"] on Windows always points to the
# current user's AppData\Local folder which is always writable
# without admin rights, unlike C:\Program Files\
# ------------------------------------------------------------
model_name = "en_US-libritts_r-medium.onnx"
model_json = "en_US-libritts_r-medium.onnx.json"
if platform.system() == "Windows":
model_dir = Path(os.environ["LOCALAPPDATA"]) / "autocore" / "piper_models"
else:
model_dir = Path.home() / ".local" / "share" / "autocore" / "piper_models"
model_path = model_dir / model_name # full path to the .onnx model
model_json_path = model_dir / model_json # full path to the .json config
# ------------------------------------------------------------
# VALIDATION HELPERS
# defined as inner functions so they can access model_dir
# from the enclosing scope without passing it as an argument
# ------------------------------------------------------------
def _is_valid_onnx(path):
# valid onnx must exist AND be at least 50MB
# we know the model is ~60MB so anything under 50MB
# means the download was interrupted or corrupted
p = Path(path)
return p.exists() and p.stat().st_size > 50 * 1024 * 1024 # 50MB in bytes
def _is_valid_json(path):
# valid json config must exist, be non-empty, and parse
# without errors — a corrupt json would crash piper at
# load time so we catch it here and re-download instead
p = Path(path)
if not p.exists() or p.stat().st_size == 0:
return False
try:
with open(p, 'r', encoding='utf-8') as f:
json.load(f) # json already imported at file level
return True
except Exception:
return False
def _download_file(filename, is_onnx=False):
# downloads one file from HuggingFace into model_dir
# hf_hub_download saves into HuggingFace's own cache first
# (~/.cache/huggingface/) then we move it to piper_models/
# retries up to 3 times for network hiccups or incomplete downloads
# returns the final Path if successful, None if all retries failed
try:
from huggingface_hub import hf_hub_download # lazy — optional dependency
except ImportError:
print("Error: huggingface_hub not installed. Run: pip install huggingface_hub")
return None
max_retries = 3
for attempt in range(1, max_retries + 1):
try:
# suppress HuggingFace's own progress/warning output
# so our print messages stay clean and readable
with warnings.catch_warnings():
warnings.simplefilter("ignore")
cached_path = hf_hub_download(
repo_id="rhasspy/piper-voices",
filename=f"en/en_US/libritts_r/medium/{filename}", # path inside the HuggingFace repo
local_dir=str(model_dir),
)
# hf_hub_download may save into a subdirectory inside model_dir
# move it to model_dir root so our fixed model_path always finds it
final_path = model_dir / filename
if os.path.abspath(cached_path) != os.path.abspath(final_path):
shutil.move(cached_path, final_path) # move from hf cache subdir to model_dir root
# validate the downloaded file before declaring success
# size check for onnx, json parse check for config file
valid = _is_valid_onnx(final_path) if is_onnx else _is_valid_json(final_path)
if valid:
return final_path # download succeeded and file is healthy
# file exists but failed validation — delete it so
# the next attempt starts a completely fresh download
print(f"Attempt {attempt}/{max_retries}: {filename} appears incomplete, retrying...")
if final_path.exists():
final_path.unlink() # delete corrupt/incomplete file
if Path(cached_path).exists():
Path(cached_path).unlink() # also clean up the hf cache copy
except Exception as e:
print(f"Attempt {attempt}/{max_retries} failed: {e}")
final_path = model_dir / filename
if final_path.exists():
final_path.unlink() # clean up any partial download
if attempt < max_retries:
print("Retrying in 3 seconds...")
time.sleep(3) # brief pause before retry to let network recover
print(f"Error: Failed to download {filename} after {max_retries} attempts.")
return None
# ------------------------------------------------------------
# DOWNLOAD MODEL IF MISSING OR INCOMPLETE
# both files are checked independently — if only the json is
# missing or corrupt we skip re-downloading the 60MB onnx and
# just fetch the small config file again
# ------------------------------------------------------------
onnx_valid = _is_valid_onnx(model_path)
json_valid = _is_valid_json(model_json_path)
if not onnx_valid or not json_valid:
print("Piper voice model not found or incomplete. Downloading...")
model_dir.mkdir(parents=True, exist_ok=True) # create piper_models/ if it doesn't exist yet
if not onnx_valid:
if model_path.exists():
print(f"Removing incomplete {model_name}...")
model_path.unlink() # delete the corrupt file before re-downloading
print(f"Downloading {model_name} (~60MB)...")
if _download_file(model_name, is_onnx=True) is None:
return # download failed after all retries, abort
print(f"{model_name} ready.")
if not json_valid:
if model_json_path.exists():
print(f"Removing incomplete {model_json}...")
model_json_path.unlink() # delete the corrupt config before re-downloading
print(f"Downloading {model_json}...")
if _download_file(model_json, is_onnx=False) is None:
return # download failed after all retries, abort
print(f"{model_json} ready.")
print("Piper model ready.")
# ------------------------------------------------------------
# SYNTHESIZE SPEECH
# PiperVoice.load() reads the .onnx model into memory
# SynthesisConfig carries volume into piper so it scales
# the audio during synthesis — no manual audio work needed
# synthesize_wav() runs the neural network and writes raw
# PCM audio into the wav file handle
# ------------------------------------------------------------
voice = PiperVoice.load(str(model_path)) # load model from piper_models/
syn_config = SynthesisConfig(volume=volume) # package volume setting for piper
# delete=False because the file must persist after the 'with'
# block closes so aplay/winsound can open and play it —
# we manually delete it ourselves after playback is done
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
tmp_path = tmp.name # grab the path before the handle closes
with wave.open(tmp_path, "wb") as wav_file:
voice.synthesize_wav(text, wav_file, syn_config=syn_config) # neural network runs here
# ------------------------------------------------------------
# PLAY AUDIO — platform specific
# each OS has its own built-in audio playback tool:
# Linux — aplay (part of alsa-utils, -q means quiet/no output)
# Windows — winsound (Python standard library, Windows only)
# ------------------------------------------------------------
if platform.system() == "Linux":
os.system(f"aplay -q {tmp_path}")
elif platform.system() == "Windows":
import winsound # lazy — Windows only
winsound.PlaySound(tmp_path, winsound.SND_FILENAME)
# temp WAV is deleted immediately after playback —
# only the model files in piper_models/ are kept permanently
if os.path.exists(tmp_path):
os.remove(tmp_path)
except ImportError:
print("Error: piper-tts not installed. Run: pip install piper-tts")
return
except Exception as e:
print(f"Error using piper TTS: {e}")
return
# integrates with log_setup() if active —
# provides an audit trail of everything spoken during a session
print(f"Spoken: {text}")
[docs]
def screenshot(*args):
"""
Takes a screenshot and saves it to the current working directory.
Modes:
1. Full screen, auto-named: ()
2. Full screen, custom name: (filename)
3. From (x,y) to screen edge, auto-named: (x, y)
4. From (x,y) to screen edge, custom name: (x, y, filename)
5. Specific region, auto-named: (x, y, width, height)
6. Specific region, custom name: (x, y, width, height, filename)
7. Selenium variants of all above: (driver, ...)
Args:
*args: Variable arguments depending on usage
::
- () : Full screen, auto-named
- (filename) : Full screen, custom filename
- (x, y) : From (x,y) to screen edge, auto-named
- (x, y, filename) : From (x,y) to screen edge, custom filename
- (x, y, width, height) : Specific region, auto-named
- (x, y, width, height, filename) : Specific region, custom filename
- (driver, ...) : Same as above but captures from Selenium browser
Where:
- driver: Selenium WebDriver instance
- x, y: Top-left corner coordinates of the screenshot region
- width, height: Dimensions of the screenshot region
- filename: Custom name to save the screenshot
- .png extension is added automatically if not provided
- If not provided, auto-generates: screenshot_YYYY-MM-DD_HH-MM-SS_<unix>.png
Example: screenshot_2025-02-18_14-30-45_1708268445.png
Returns:
True if successful, False otherwise
Output:
- Prints the full path of the saved screenshot on success.
- Prints error message if invalid arguments or coordinates are provided.
Example:
::
# Full screen (PyAutoGUI)
screenshot() # Full screen, auto-named
screenshot('desktop.png') # Full screen, custom name
# Selenium full page
screenshot(driver) # Selenium full page, auto-named
screenshot(driver, 'webpage.png') # Selenium full page, custom name
# From top-left point to edge (PyAutoGUI)
screenshot(100, 200) # From (100,200) to edge, auto-named
screenshot(100, 200, 'crop.png') # From (100,200) to edge, custom name
# From top-left point to edge (Selenium)
screenshot(driver, 100, 200) # Selenium from (100,200) to edge
screenshot(driver, 100, 200, 'page.png') # Selenium, custom name
# Specific region (PyAutoGUI)
screenshot(0, 0, 500, 300) # Region: top-left (0,0), 500x300, auto-named
screenshot(0, 0, 500, 300, 'region.png') # Region: top-left (0,0), 500x300, custom name
# Specific region (Selenium)
screenshot(driver, 0, 0, 800, 600) # Selenium region, auto-named
screenshot(driver, 0, 0, 800, 600, 'sel.png') # Selenium region, custom name
"""
try:
# ------------------------------------------------------------
# DETERMINE MODE : Selenium or PyAutoGUI
# ------------------------------------------------------------
if len(args) > 0 and hasattr(args[0], 'save_screenshot'):
# First argument is a WebDriver object
use_driver = True
driver_obj = args[0]
remaining_args = args[1:]
else:
# PyAutoGUI mode : guard display check here
if not _GUI_AVAILABLE:
print("Error: screenshot() requires a display.")
return False
use_driver = False
driver_obj = None
remaining_args = args
# Default values
x, y, width, height, filename = None, None, None, None, None
# ------------------------------------------------------------
# PARSE ARGUMENTS
# ------------------------------------------------------------
if len(remaining_args) == 0:
# screenshot() or screenshot(driver)
# Full screen, auto-named
x, y, width, height, filename = 0, 0, None, None, None
elif len(remaining_args) == 1:
# screenshot('file.png') or screenshot(driver, 'file.png')
# Full screen, custom name
x, y, width, height = 0, 0, None, None
filename = remaining_args[0]
elif len(remaining_args) == 2:
# screenshot(100, 200) or screenshot(driver, 100, 200)
# From (x,y) to screen edge, auto-named
x, y = remaining_args[0], remaining_args[1]
width, height, filename = None, None, None
elif len(remaining_args) == 3:
# screenshot(100, 200, 'file.png') or screenshot(driver, 100, 200, 'file.png')
# From (x,y) to screen edge, custom name
x, y, filename = remaining_args[0], remaining_args[1], remaining_args[2]
width, height = None, None
elif len(remaining_args) == 4:
# screenshot(0, 0, 500, 300) or screenshot(driver, 0, 0, 500, 300)
# Specific region, auto-named
x, y, width, height = remaining_args[0], remaining_args[1], remaining_args[2], remaining_args[3]
filename = None
elif len(remaining_args) == 5:
# screenshot(0, 0, 500, 300, 'file.png') or screenshot(driver, 0, 0, 500, 300, 'file.png')
# Specific region, custom name
x, y, width, height, filename = remaining_args
else:
raise ValueError("Too many arguments")
# Convert coordinates to integers
x = int(x) if x is not None else 0
y = int(y) if y is not None else 0
# ------------------------------------------------------------
# GET SCREEN DIMENSIONS
# ------------------------------------------------------------
if use_driver:
screen_width = driver_obj.execute_script("return window.innerWidth")
screen_height = driver_obj.execute_script("return window.innerHeight")
else:
screen_width, screen_height = pyautogui.size()
# Calculate width and height if not provided
if width is None:
width = screen_width - x # From x to right edge
if height is None:
height = screen_height - y # From y to bottom edge
# Convert to integers
width, height = int(width), int(height)
# Validate
if width <= 0 or height <= 0:
raise ValueError("Width and height must be positive")
if x < 0 or y < 0:
raise ValueError("Coordinates must be non-negative")
# ------------------------------------------------------------
# GENERATE FILENAME
# ------------------------------------------------------------
if filename is None:
now = datetime.datetime.now()
timestamp = now.strftime("%Y-%m-%d_%H-%M-%S")
unix_time = int(now.timestamp())
filename = f"screenshot_{timestamp}_{unix_time}.png"
else:
# Add .png extension if missing
if not os.path.splitext(filename)[1]:
filename += '.png'
# Get full path
full_path = os.path.join(os.getcwd(), filename)
# ------------------------------------------------------------
# TAKE SCREENSHOT
# ------------------------------------------------------------
if use_driver:
if x == 0 and y == 0 and width == screen_width and height == screen_height:
# Full browser window, no cropping needed
driver_obj.save_screenshot(full_path)
else:
# Region crop requested
png_bytes = driver_obj.get_screenshot_as_png()
image = Image.open(io.BytesIO(png_bytes))
img_width, img_height = image.size
crop_x2 = min(x + width, img_width)
crop_y2 = min(y + height, img_height)
cropped = image.crop((x, y, crop_x2, crop_y2))
cropped.save(full_path)
else:
# PyAutoGUI: capture region directly
img = pyautogui.screenshot(region=(x, y, width, height))
img.save(full_path)
print(f"Screenshot saved: {full_path}")
return True
except ValueError as e:
print(f"Invalid input: {e}")
return False
except RuntimeError as e:
print(f"Error: {e}")
return False
except Exception as e:
print(f"Error taking screenshot: {e}")
return False
[docs]
def second():
"""
Get current second.
Returns:
int: Current second (0-59)
Example:
second() # 45
"""
return time.localtime().tm_sec
[docs]
def wait(*args, countdown=True):
"""
Wait with countdown, wait for element or wait for color at pixel.
Args:
*args: Variable arguments (see examples)
countdown: If True, shows countdown display (default: True)
Returns:
True if successful, False if error or timeout
Example:
::
# Countdown wait
wait(5) # Wait 5 seconds with countdown
wait(10, countdown=False) # Wait 10 seconds silently
wait() # Wait 3 seconds (default)
# Wait for element (Selenium) - pass driver object
wait(driver, 'xpath', '//button') # Wait max 180s with countdown
wait(driver, 'id', 'submit-btn', 10) # Wait max 10s with countdown
wait(driver, 'class', 'content', 30, countdown=False) # Wait silently for 30s
# Wait for color at pixel
wait(100, 200, 255, 0, 0) # Wait for red (255,0,0) at (100,200) with countdown
wait(100, 200, 255, 0, 0, 30) # Wait for red, max 30s with countdown
wait(500, 300, 0, 255, 0, 60, countdown=False) # Wait silently
"""
# if no argument is passed
if len(args) == 0:
args = (3,) # Set default to 3 seconds
# ------------------------------------------------------------
# MODE 1 : COUNTDOWN (1 argument, integer or float)
# needs: nothing : safe on all platforms
# ------------------------------------------------------------
if len(args) == 1 and isinstance(args[0], (int, float)):
seconds = args[0]
if seconds < 0:
raise ValueError("Seconds must be non-negative")
if seconds == 0:
return True # No wait needed
# Convert to int for countdown
seconds_int = int(seconds)
if countdown:
print('Waiting:')
for remaining in range(seconds_int, 0, -1):
print(f'..{remaining} ', end='\r', flush=True)
time.sleep(1)
print('Done ')
else:
time.sleep(seconds)
return True
# ------------------------------------------------------------
# MODE 2 : WAIT FOR ELEMENT (driver object + 2 or 3 arguments)
# needs: nothing : works on all platforms
# ------------------------------------------------------------
elif len(args) >= 3 and hasattr(args[0], 'find_element'):
# Selenium mode - driver object detected
driver_obj = args[0]
selector_type = args[1]
selector = args[2]
timeout = args[3] if len(args) == 4 else 180 # Default 180s (3 minutes)
# Validate selector type
if selector_type not in ['id', 'xpath', 'class', 'name', 'css', 'tag', 'text', 'partial']:
raise ValueError(f"Invalid selector type: {selector_type}")
if countdown:
print(f'Waiting for element ({selector_type}: {selector}):')
# Save original implicit wait and set to 0 for fast checking
original_implicit_wait = driver_obj.timeouts.implicit_wait
driver_obj.implicitly_wait(0)
start_time = time.time()
# Countdown approach: check element every second, print countdown
for remaining in range(timeout, 0, -1):
# Check if element exists using get_web_element
element = _get_web_element(driver_obj, selector_type, selector)
# Element found
if element:
elapsed = time.time() - start_time
# Restore original implicit wait
driver_obj.implicitly_wait(original_implicit_wait)
print(f"Element found after {elapsed:.1f}s")
return True
# Print countdown
if countdown:
print(f'..{remaining} ', end='\r', flush=True)
# Wait 1 second before next check
time.sleep(1)
# Restore original implicit wait
driver_obj.implicitly_wait(original_implicit_wait)
# Timeout reached
print(f"Element not found after {timeout}s timeout.")
return False
# ------------------------------------------------------------------
# MODE 3 : WAIT FOR COLOR AT PIXEL (5 or 6 arguments, all integers)
# needs: display
# ------------------------------------------------------------------
elif len(args) in [5, 6] and all(isinstance(arg, int) for arg in args):
if not _GUI_AVAILABLE:
print("Error: wait() color mode requires a display.")
return False
x = args[0]
y = args[1]
target_r = args[2]
target_g = args[3]
target_b = args[4]
timeout = args[5] if len(args) == 6 else 180 # Default 180s
if countdown:
print(f'Waiting at ({x},{y}) for color RGB({target_r},{target_g},{target_b}) :')
start_time = time.time()
# Countdown approach: check color every second, print countdown
for remaining in range(timeout, 0, -1):
try:
# Check if color matches at pixel
if pyautogui.pixelMatchesColor(x, y, (target_r, target_g, target_b)):
elapsed = time.time() - start_time
print(f"Color found after {elapsed:.1f}s")
return True
except Exception as e:
raise RuntimeError(f"Error checking pixel color: {e}")
# Print countdown
if countdown:
print(f'..{remaining} ', end='\r', flush=True)
# Wait 1 second before next check
time.sleep(1)
# Timeout reached
print(f"Color not found after {timeout}s timeout.")
return False
# ---------------Invalid Arguments---------------
else:
raise ValueError("Invalid arguments for wait()")
[docs]
def wait_download(timeout=1200, url=None, filename=None, download_dir=None):
"""
Wait for a browser-initiated download to complete or download a file directly via URL.
Modes:
1. URL mode (url provided): Downloads file directly using requests. Useful for
file types blocked by browsers (.exe, .msix, .msi, etc.). File is saved to
Python's current working directory.
2. Monitor mode (url not provided): Monitors the downloads folder for a
browser-initiated download to complete.
Args:
timeout: Maximum seconds to wait for download completion (default: 1200)
url: Direct download URL (optional)::
- If provided: Downloads file directly via requests
- If None: Monitors downloads folder for browser-initiated download
filename: Custom filename to save/rename the downloaded file (optional)::
- With extension (e.g. "myapp.exe") : used as-is
- Without extension (e.g. "myapp") : extension borrowed from original file
- If None: Original filename is kept
- If multiple files are downloaded, only the first completed file is renamed
download_dir: Custom download directory to monitor (monitor mode only, optional)::
- If provided: Uses specified path and skips all auto-detection
- If None: Auto-detects using the priority order described in Note below
Returns::
str: Full path of the downloaded file (always includes extension) on success
False: On failure (download error, timeout, directory access issue, etc.)
Output:
- Prints download progress every 10 seconds showing elapsed time and file size.
- Prints confirmation with final filename and saved path on completion.
- Prints timeout message if download does not complete in time.
Example:
::
wait_download() # Monitor downloads folder
wait_download(url='https://abc.com/file.msix') # Direct download via URL
wait_download(url='https://abc.com/file.msix', filename='myapp') # Custom name, borrows extension
wait_download(300, filename='our_log.txt') # Monitor and rename on completion
wait_download(600, download_dir='/downloads') # Docker with custom path
wait_download(300, download_dir='D:/MyDownloads') # Windows custom path
# Use with browser() : pass driver.download_dir to guarantee alignment
driver = browser('https://example.com')
click(driver, 'id', 'download-button')
wait_download(download_dir=driver.download_dir)
Note:
- When download_dir is not provided, the folder is auto-detected in this order:
1. DOWNLOAD_DIR environment variable (if set at OS level)
2. /downloads folder (if running inside Docker)
3. ~/Downloads (default fallback)
- If a file was modified within the last 20 seconds before calling this function,
it will be detected as a recently completed download and returned immediately.
This handles cases where downloads complete very quickly before monitoring starts.
"""
def _resolve_final_filename(custom_name, original_name):
"""Resolve the final filename, borrowing extension from original if custom name has none."""
if custom_name is None:
return original_name
original_ext = os.path.splitext(original_name)[1]
custom_base, custom_ext = os.path.splitext(custom_name)
return custom_name if custom_ext else custom_base + original_ext
def _rename_if_needed(directory, original_name, final_name):
"""Rename the downloaded file if a custom filename was provided."""
if final_name != original_name:
os.rename(
os.path.join(directory, original_name),
os.path.join(directory, final_name)
)
print(f'"{original_name}" renamed to "{final_name}"')
def _format_size(bytes_count):
"""Convert bytes to a human-readable MB string."""
return f"{bytes_count / (1024 * 1024):.1f} MB"
# ------------------------------------------------------------
# MODE 1 : Direct download via requests
# ------------------------------------------------------------
if url is not None:
try:
original_filename = url.split('/')[-1].split('?')[0]
final_filename = _resolve_final_filename(filename, original_filename)
print(f"Downloading: {final_filename}")
response = requests.get(url, stream=True, timeout=30) # 30s for connection/chunk timeout
response.raise_for_status()
deadline = time.time() + (timeout) # enforce total download time limit
download_start = time.time()
last_print_time = 0
total_bytes = 0
with open(final_filename, 'wb') as f:
for chunk in response.iter_content(chunk_size=8192):
if time.time() > deadline:
print(f"Timeout after {timeout} seconds while downloading '{final_filename}'")
return False
f.write(chunk)
total_bytes += len(chunk)
elapsed_sec = int(time.time() - download_start)
# Print progress every 10 seconds
if elapsed_sec - last_print_time >= 10:
elapsed = str(datetime.timedelta(seconds=elapsed_sec))
print(f"Downloading... {elapsed} elapsed ({_format_size(total_bytes)})")
last_print_time = elapsed_sec
print(f"Downloaded file saved at: {os.path.abspath(final_filename)}")
return os.path.abspath(final_filename)
except Exception as e:
print(f"Could not download file: {e}")
return False
# ------------------------------------------------------------
# MODE 2 : Monitor browser downloads folder
# ------------------------------------------------------------
# Determine download directory using the following priority order:
# 1. User provided path via download_dir argument
# 2. DOWNLOAD_DIR environment variable (if set)
# 3. /downloads folder (if running inside Docker)
# 4. ~/Downloads folder (default, if none of the above apply)
if download_dir is not None:
download_dir = str(download_dir)
elif os.getenv('DOWNLOAD_DIR'):
download_dir = os.getenv('DOWNLOAD_DIR')
print(f'Using DOWNLOAD_DIR from environment: {download_dir}')
elif os.path.exists('/.dockerenv'):
download_dir = '/downloads'
print(f'Docker detected, using: {download_dir}')
else:
download_dir = str(Path.home() / "Downloads")
# Ensure the download directory exists
if not os.path.exists(download_dir):
print(f"Warning: Download directory does not exist: {download_dir}")
print(f"Attempting to create it...")
try:
os.makedirs(download_dir, exist_ok=True)
print(f"Created directory: {download_dir}")
except Exception as e:
print(f"Error: Could not create directory: {e}")
return False
print(f'Monitoring downloads in: {download_dir}')
# Temporary file extensions used by different browsers while downloading
temp_extensions = ('.crdownload', '.part', '.download', '.tmp', '.temp')
# Snapshot the initial state of the downloads folder before monitoring begins
try:
initial_files = set(os.listdir(download_dir))
initial_temp_files = set([f for f in initial_files if f.endswith(temp_extensions)])
# Record sizes of any temp files that already exist, to detect if they are actively downloading
initial_temp_sizes = {}
for f in initial_temp_files:
try:
initial_temp_sizes[f] = os.path.getsize(os.path.join(download_dir, f))
except Exception as e:
print(f"Could not get file size of {f}: {e}")
except Exception as e:
print(f"Error accessing download directory: {e}")
return False
# Wait 10 seconds before first check to allow download to start
print("Checking for active downloads...")
time.sleep(10)
# Check if a file was already downloaded quickly (modified within last 20 seconds)
try:
current_files_after_wait = set(os.listdir(download_dir))
current_time = time.time()
recent_files = []
for f in current_files_after_wait:
if not f.endswith(temp_extensions):
try:
mtime = os.path.getmtime(os.path.join(download_dir, f))
age = current_time - mtime
if age <= 20:
recent_files.append((f, age))
except Exception as e:
print(f"Could not get modification time of {f}: {e}")
if recent_files:
recent_files.sort(key=lambda x: x[1]) # sort by age, most recent first
most_recent_file, age = recent_files[0]
final_filename = _resolve_final_filename(filename, most_recent_file)
_rename_if_needed(download_dir, most_recent_file, final_filename)
print(f"Quick download detected: '{final_filename}' (modified {int(age)} seconds ago)")
print(f"Downloaded file saved at: {os.path.join(download_dir, final_filename)}")
return os.path.join(download_dir, final_filename)
except Exception as e:
print(f"Error checking for recent files: {e}")
download_time = 10 # already waited 10 seconds above
last_print_time = 0
download_started = False
monitoring_files = set()
completed_files_so_far = [] # tracks files already reported as completed mid-session
filename_used = False # ensures filename rename only applies to first completed file
while download_time < timeout:
try:
current_files = set(os.listdir(download_dir))
current_temp_files = set([f for f in current_files if f.endswith(temp_extensions)])
# Get current sizes of all active temp files
current_temp_sizes = {}
for f in current_temp_files:
try:
current_temp_sizes[f] = os.path.getsize(os.path.join(download_dir, f))
except Exception as e:
print(f"Could not get file size of {f}: {e}")
new_temp_files = current_temp_files - initial_temp_files
# On the first pass (10s mark), check if any pre-existing temp files are actively growing
if download_time == 10:
active_old_temp_files = set()
for f in initial_temp_files:
if f in current_temp_sizes and f in initial_temp_sizes:
if current_temp_sizes[f] != initial_temp_sizes[f]:
active_old_temp_files.add(f)
monitoring_files.add(f)
if active_old_temp_files:
download_started = True
if len(active_old_temp_files) == 1:
print(f"Active download detected: '{list(active_old_temp_files)[0]}'")
else:
print(
f"{len(active_old_temp_files)} active downloads detected: {', '.join(active_old_temp_files)}")
# Track any brand new temp files that appeared after monitoring started
if new_temp_files:
truly_new = new_temp_files - monitoring_files
if truly_new:
if not download_started:
download_started = True
monitoring_files.update(truly_new)
if len(truly_new) == 1:
print(f"New download started: '{list(truly_new)[0]}'")
else:
print(f"{len(truly_new)} new downloads started: {', '.join(truly_new)}")
# Check for any newly completed files : catches mid-session completions including
# small files that complete too quickly for temp file tracking to catch
newly_completed = [
f for f in (current_files - initial_files)
if not f.endswith(temp_extensions)
and f not in completed_files_so_far
]
for f in newly_completed:
elapsed = str(datetime.timedelta(seconds=download_time))
if not filename_used:
final_name = _resolve_final_filename(filename, f)
_rename_if_needed(download_dir, f, final_name)
filename_used = True
else:
final_name = f
if filename is not None:
print(f'"{f}" kept original name (filename only applies to first file)')
print(f"Download completed: '{final_name}' (took {elapsed})")
completed_files_so_far.append(final_name)
all_monitoring = (current_temp_files & monitoring_files) | new_temp_files
# Return immediately if a file just completed and no temp files are still active
if newly_completed and not (current_temp_files & monitoring_files):
primary = completed_files_so_far[0]
print(f"Downloaded file saved at: {os.path.join(download_dir, primary)}")
return os.path.join(download_dir, primary)
if all_monitoring:
# Still downloading : print progress every 10 seconds with size of each file
if download_time - last_print_time >= 10:
elapsed = str(datetime.timedelta(seconds=download_time))
if len(all_monitoring) == 1:
f = list(all_monitoring)[0]
size_str = _format_size(current_temp_sizes.get(f, 0))
print(f"Downloading... {elapsed} elapsed ({size_str})")
else:
size_parts = ', '.join(
f"{f}: {_format_size(current_temp_sizes.get(f, 0))}"
for f in all_monitoring
)
print(f"Downloading... {elapsed} elapsed ({size_parts})")
last_print_time = download_time
elif download_started:
# All temp files are gone : find any remaining unreported completed files
new_files = current_files - initial_files
remaining_completed = [
f for f in new_files
if not f.endswith(temp_extensions)
and f not in completed_files_so_far
]
if remaining_completed:
elapsed = str(datetime.timedelta(seconds=download_time))
for f in remaining_completed:
if not filename_used:
final_name = _resolve_final_filename(filename, f)
_rename_if_needed(download_dir, f, final_name)
filename_used = True
else:
final_name = f
if filename is not None:
print(f'"{f}" kept original name (filename only applies to first file)')
print(f"Download completed: '{final_name}' (took {elapsed})")
completed_files_so_far.append(final_name)
# Return the first completed file as the primary result
primary = completed_files_so_far[0]
print(f"Downloaded file saved at: {os.path.join(download_dir, primary)}")
return os.path.join(download_dir, primary)
else:
# Temp files gone but no completed file found yet : keep waiting briefly
if download_time - last_print_time >= 10:
elapsed = str(datetime.timedelta(seconds=download_time))
print(f"Verifying download... (elapsed: {elapsed})")
last_print_time = download_time
else:
# No download detected yet : no size to report, just print elapsed time
if download_time - last_print_time >= 10:
recent_complete_files = [
f for f in current_files
if not f.endswith(temp_extensions) and f not in initial_files
]
if recent_complete_files:
original_name = recent_complete_files[0]
final_filename = _resolve_final_filename(filename, original_name)
_rename_if_needed(download_dir, original_name, final_filename)
print(f"Quick download detected: '{final_filename}'")
print(f"Downloaded file saved at: {os.path.join(download_dir, final_filename)}")
return os.path.join(download_dir, final_filename)
elapsed = str(datetime.timedelta(seconds=download_time))
print(f"Waiting for download to start... {elapsed} elapsed")
last_print_time = download_time
except Exception as e:
print(f'Error monitoring downloads: {e}')
return False
time.sleep(1)
download_time += 1
# Report any files that completed mid-session before the timeout
if completed_files_so_far:
if len(completed_files_so_far) == 1:
print(f"Note: 1 file completed during session: '{list(completed_files_so_far)[0]}'")
else:
print(f"Note: {len(completed_files_so_far)} files completed during session:")
for f in completed_files_so_far:
print(f" - {f}")
# Timeout reached : report what was still in progress if anything
if monitoring_files or download_started:
current_files = set(os.listdir(download_dir))
current_temp_files = set([f for f in current_files if f.endswith(temp_extensions)])
still_downloading = current_temp_files & monitoring_files
if still_downloading:
if len(still_downloading) == 1:
print(f"Timeout after {timeout} seconds while waiting for '{list(still_downloading)[0]}' to complete.")
else:
print(
f"Timeout after {timeout} seconds while waiting for {len(still_downloading)} files: {', '.join(still_downloading)}")
else:
print(f'Timeout after {timeout} seconds. Download status unclear.')
else:
print(f'Timeout after {timeout} seconds. No download detected.')
return False
[docs]
def window(action=None, target=None, *args):
"""
Unified window management function for Windows and Linux.
Args:
action: Window operation to perform (default: 'list')::
'list' : Get all window titles
'title' : Get active window title (or find full title if target provided)
'focus' : Bring window to foreground
'close' : Close window
'minimize' : Minimize window
'maximize' : Maximize window
'resize' : Resize window (requires width, height)
'move' : Move window (requires x, y)
target: Window title or pattern (required for most actions)
*args: Additional parameters (width, height for resize; x, y for move)
Returns:
Return type depends on action::
list/None : List of strings when action is None or 'list'
title : String or None
others : True if successful, False otherwise
Raises:
ValueError: If invalid action, missing required parameters or invalid
dimensions/coordinates
NotImplementedError: If called on macOS
Output:
- Prints error if window not found, with suggestions for similar window titles
(focus action only).
- Prints error if wmctrl or xdotool is not installed (Linux only).
Example:
::
# Get all windows (default)
window() # ['Chrome', 'Notepad', 'Excel']
window('list') # ['Chrome', 'Notepad', 'Excel']
# Check if window exists
if 'Chrome' in window():
print("Chrome is open!")
# Get active window title
window('title') # 'Google Chrome - New Tab'
# Find full title containing text
window('title', 'Chrome') # 'Google Chrome - New Tab'
window('title', 'Note') # 'Untitled - Notepad'
# Window operations
window('focus', 'Chrome') # Focus window
window('close', 'Notepad') # Close window
window('minimize', 'Excel') # Minimize window
window('maximize', 'Word') # Maximize window
# Position and size
window('resize', 'Chrome', 800, 600) # Resize to 800x600
window('move', 'Chrome', 100, 200) # Move to (100, 200)
# Side-by-side setup (1920x1080 display)
window('move', 'Chrome', 0, 0) # Left side
window('resize', 'Chrome', 960, 1080) # Half screen width
window('move', 'Code', 960, 0) # Right side
window('resize', 'Code', 960, 1080) # Half screen width
# Recording setup
window('resize', 'Demo', 1280, 720) # 720p size
window('move', 'Demo', 320, 180) # Centered on 1920x1080
Note:
- On Linux, resize and move automatically remove maximized/minimized
state before applying changes, ensuring consistent behavior.
- Target matching is case-insensitive and partial (e.g. 'Chrome' matches
'Google Chrome - New Tab').
"""
system = platform.system()
# Check if macOS and reject
if system == "Darwin":
raise NotImplementedError(
"window() is not supported on macOS.\n"
"Supported platforms: Windows, Linux\n"
"Note: Selenium functions will still work on macOS."
)
# Default action: list all windows
if action is None:
action = 'list'
action = action.lower()
# Validate action
valid_actions = ['list', 'title', 'focus', 'close', 'minimize', 'maximize', 'resize', 'move']
if action not in valid_actions:
raise ValueError(f"Invalid action '{action}'. Valid actions: {', '.join(valid_actions)}")
# Check if target is required
if action in ['focus', 'close', 'minimize', 'maximize', 'resize', 'move'] and target is None:
raise ValueError(f"Action '{action}' requires a target window")
try:
# ------------------------------------------------------------
# LIST - Get all window titles
# ------------------------------------------------------------
if action == 'list':
if system == "Windows":
titles = []
def enum_handler(hwnd, ctx):
if win32gui.IsWindowVisible(hwnd):
title = win32gui.GetWindowText(hwnd)
if title.strip():
titles.append(title)
win32gui.EnumWindows(enum_handler, None)
return titles
elif system == "Linux":
result = subprocess.run(['wmctrl', '-l'], capture_output=True, text=True)
if result.returncode == 0:
titles = []
for line in result.stdout.strip().split('\n'):
parts = line.split(None, 3)
if len(parts) >= 4:
titles.append(parts[3])
return titles
else:
print("Error: wmctrl not installed.")
print("Install with: sudo apt-get install wmctrl")
return []
# ------------------------------------------------------------
# TITLE - Get active window title OR find window title
# ------------------------------------------------------------
elif action == 'title':
# No target - get active window title
if target is None:
if system == "Windows":
title = win32gui.GetWindowText(win32gui.GetForegroundWindow()).strip()
return title if title else None
elif system == "Linux":
result = subprocess.run(['xdotool', 'getactivewindow', 'getwindowname'],
capture_output=True, text=True)
if result.returncode == 0:
return result.stdout.strip()
else:
print("Error: xdotool not installed.")
print("Install with: sudo apt-get install xdotool")
return None
# Target provided - find full title containing target text
else:
all_windows = window('list')
# Case-insensitive contains match
for win_title in all_windows:
if target.lower() in win_title.lower():
return win_title
# Not found
return None
# ------------------------------------------------------------
# FOCUS - Bring window to foreground
# ------------------------------------------------------------
elif action == 'focus':
all_windows = window('list')
matches = [w for w in all_windows if target.lower() in w.lower()]
if not matches:
print(f"Window not found: {target}")
# Suggest similar windows
suggestions = get_close_matches(target, all_windows, n=3, cutoff=0.6)
if suggestions:
print(f"Did you mean: {', '.join(suggestions)}")
return False
window_title = matches[0]
if system == "Windows":
def find_window_hwnd(title):
hwnd_list = []
def enum_handler(hwnd, ctx):
if win32gui.IsWindowVisible(hwnd):
win_text = win32gui.GetWindowText(hwnd)
if win_text == title:
hwnd_list.append(hwnd)
win32gui.EnumWindows(enum_handler, None)
return hwnd_list[0] if hwnd_list else None
hwnd = find_window_hwnd(window_title)
if hwnd:
win32gui.ShowWindow(hwnd, win32con.SW_RESTORE)
win32gui.SetForegroundWindow(hwnd)
return True
return False
elif system == "Linux":
result = subprocess.run(['wmctrl', '-a', window_title],
capture_output=True, text=True)
return result.returncode == 0
# ------------------------------------------------------------
# CLOSE - Close window
# ------------------------------------------------------------
elif action == 'close':
all_windows = window('list')
matches = [w for w in all_windows if target.lower() in w.lower()]
if not matches:
print(f"Window not found: {target}")
return False
window_title = matches[0]
if system == "Windows":
def find_window_hwnd(title):
hwnd_list = []
def enum_handler(hwnd, ctx):
if win32gui.IsWindowVisible(hwnd):
win_text = win32gui.GetWindowText(hwnd)
if win_text == title:
hwnd_list.append(hwnd)
win32gui.EnumWindows(enum_handler, None)
return hwnd_list[0] if hwnd_list else None
hwnd = find_window_hwnd(window_title)
if hwnd:
win32gui.PostMessage(hwnd, win32con.WM_CLOSE, 0, 0)
return True
return False
elif system == "Linux":
result = subprocess.run(['wmctrl', '-c', window_title],
capture_output=True, text=True)
return result.returncode == 0
# ------------------------------------------------------------
# MINIMIZE - Minimize window
# ------------------------------------------------------------
elif action == 'minimize':
all_windows = window('list')
matches = [w for w in all_windows if target.lower() in w.lower()]
if not matches:
print(f"Window not found: {target}")
return False
window_title = matches[0]
if system == "Windows":
def find_window_hwnd(title):
hwnd_list = []
def enum_handler(hwnd, ctx):
if win32gui.IsWindowVisible(hwnd):
win_text = win32gui.GetWindowText(hwnd)
if win_text == title:
hwnd_list.append(hwnd)
win32gui.EnumWindows(enum_handler, None)
return hwnd_list[0] if hwnd_list else None
hwnd = find_window_hwnd(window_title)
if hwnd:
win32gui.ShowWindow(hwnd, win32con.SW_MINIMIZE)
return True
return False
elif system == "Linux":
result = subprocess.run(['wmctrl', '-l'], capture_output=True, text=True)
if result.returncode == 0:
for line in result.stdout.strip().split('\n'):
if window_title in line:
window_id = line.split()[0]
subprocess.run(['xdotool', 'windowminimize', window_id])
return True
return False
# ------------------------------------------------------------
# MAXIMIZE - Maximize window
# ------------------------------------------------------------
elif action == 'maximize':
if not window('focus', target):
return False
time.sleep(0.5)
all_windows = window('list')
matches = [w for w in all_windows if target.lower() in w.lower()]
if not matches:
print(f"Window not found: {target}")
return False
window_title = matches[0]
if system == "Windows":
def find_window_hwnd(title):
hwnd_list = []
def enum_handler(hwnd, ctx):
if win32gui.IsWindowVisible(hwnd):
win_text = win32gui.GetWindowText(hwnd)
if win_text == title:
hwnd_list.append(hwnd)
win32gui.EnumWindows(enum_handler, None)
return hwnd_list[0] if hwnd_list else None
hwnd = find_window_hwnd(window_title)
if hwnd:
win32gui.ShowWindow(hwnd, win32con.SW_MAXIMIZE)
return True
return False
elif system == "Linux":
result = subprocess.run(['wmctrl', '-r', window_title, '-b',
'add,maximized_vert,maximized_horz'],
capture_output=True, text=True)
return result.returncode == 0
# ------------------------------------------------------------
# RESIZE - Resize window to specific dimensions
# ------------------------------------------------------------
elif action == 'resize':
if len(args) < 2:
raise ValueError("Action 'resize' requires width and height: window('resize', 'Chrome', 800, 600)")
try:
width = int(args[0])
height = int(args[1])
except (ValueError, TypeError):
raise ValueError("Width and height must be integers")
if width <= 0 or height <= 0:
raise ValueError("Width and height must be positive")
if not window('focus', target):
return False
time.sleep(0.5)
all_windows = window('list')
matches = [w for w in all_windows if target.lower() in w.lower()]
if not matches:
print(f"Window not found: {target}")
return False
window_title = matches[0]
if system == "Windows":
def find_window_hwnd(title):
hwnd_list = []
def enum_handler(hwnd, ctx):
if win32gui.IsWindowVisible(hwnd):
win_text = win32gui.GetWindowText(hwnd)
if win_text == title:
hwnd_list.append(hwnd)
win32gui.EnumWindows(enum_handler, None)
return hwnd_list[0] if hwnd_list else None
hwnd = find_window_hwnd(window_title)
if hwnd:
rect = win32gui.GetWindowRect(hwnd)
x, y = rect[0], rect[1]
win32gui.MoveWindow(hwnd, x, y, width, height, True)
return True
return False
elif system == "Linux":
result = subprocess.run(['wmctrl', '-l'], capture_output=True, text=True)
if result.returncode == 0:
for line in result.stdout.strip().split('\n'):
if window_title in line:
window_id = line.split()[0]
# Remove maximized/minimized state before resizing
subprocess.run(
['wmctrl', '-i', '-r', window_id, '-b', 'remove,maximized_vert,maximized_horz'])
subprocess.run(['xdotool', 'windowmap', window_id]) # restore if minimized
time.sleep(0.3)
subprocess.run(['wmctrl', '-i', '-r', window_id, '-e', f'0,-1,-1,{width},{height}'])
return True
return False
# ------------------------------------------------------------
# MOVE - Move window to specific coordinates
# ------------------------------------------------------------
elif action == 'move':
if len(args) < 2:
raise ValueError("Action 'move' requires x and y coordinates: window('move', 'Chrome', 100, 200)")
try:
x = int(args[0])
y = int(args[1])
except (ValueError, TypeError):
raise ValueError("X and Y coordinates must be integers")
if x < 0 or y < 0:
raise ValueError("X and Y coordinates must be non-negative")
if not window('focus', target):
return False
time.sleep(0.5)
all_windows = window('list')
matches = [w for w in all_windows if target.lower() in w.lower()]
if not matches:
print(f"Window not found: {target}")
return False
window_title = matches[0]
if system == "Windows":
def find_window_hwnd(title):
hwnd_list = []
def enum_handler(hwnd, ctx):
if win32gui.IsWindowVisible(hwnd):
win_text = win32gui.GetWindowText(hwnd)
if win_text == title:
hwnd_list.append(hwnd)
win32gui.EnumWindows(enum_handler, None)
return hwnd_list[0] if hwnd_list else None
hwnd = find_window_hwnd(window_title)
if hwnd:
rect = win32gui.GetWindowRect(hwnd)
width = rect[2] - rect[0]
height = rect[3] - rect[1]
win32gui.MoveWindow(hwnd, x, y, width, height, True)
return True
return False
elif system == "Linux":
result = subprocess.run(['wmctrl', '-l'], capture_output=True, text=True)
if result.returncode == 0:
for line in result.stdout.strip().split('\n'):
if window_title in line:
window_id = line.split()[0]
# Remove maximized/minimized state before moving
subprocess.run(
['wmctrl', '-i', '-r', window_id, '-b', 'remove,maximized_vert,maximized_horz'])
subprocess.run(['xdotool', 'windowmap', window_id]) # restore if minimized
time.sleep(0.3)
subprocess.run(['wmctrl', '-i', '-r', window_id, '-e', f'0,{x},{y},-1,-1'])
return True
return False
except Exception as e:
print(f"Error in window operation: {e}")
return False if action in ['focus', 'close', 'minimize', 'maximize', 'resize', 'move'] else None
[docs]
def write(*keys):
"""
Write or type text using keyboard (PyAutoGUI or Selenium).
Args:
*keys: Variable arguments depending on mode
Returns:
True if successful, False otherwise
Output:
- Prints error message if element was not found (Selenium).
- Prints error message if invalid arguments are provided.
Example:
::
# PyAutoGUI mode (types in any active window)
write("Hello World") # Types in active window
write("user@example.com") # Types email
write("12345") # Types numbers as string
# Selenium mode - type on active element in browser
write(driver, "Hello World") # Types on active element
write(driver, "Search query") # Types in focused input
# Selenium mode - type in specific element
write(driver, "id", "username", "john_doe")
write(driver, "xpath", "//input[@name='email']", "user@example.com")
write(driver, "class", "search-box", "Python tutorial")
Note:
- PyAutoGUI uses typewrite() which types one character at a time.
- Selenium uses send_keys() which types the entire string at once.
"""
try:
# ------------------------------------------------------------
# MODE 1 : PYAUTOGUI (1 argument - text only)
# needs: display
# ------------------------------------------------------------
if len(keys) == 1:
if not _GUI_AVAILABLE:
print("Error: write() requires a display.")
return False
pyautogui.typewrite(keys[0])
time.sleep(1)
return True
# ------------------------------------------------------------
# SELENIUM MODE (driver object detected)
# needs: nothing : works on all platforms
# ------------------------------------------------------------
elif len(keys) >= 2 and hasattr(keys[0], 'find_element'):
driver_obj = keys[0]
# ----------------------------------------------------------
# MODE 2 : Type on page (driver, text)
# ----------------------------------------------------------
if len(keys) == 2:
# write(driver, "Hello World")
text = keys[1]
action = ActionChains(driver_obj)
action.send_keys(text).perform()
return True
# ----------------------------------------------------------
# MODE 3 : Type in specific element (driver, selector_type, selector, text)
# ----------------------------------------------------------
elif len(keys) == 4:
# write(driver, "id", "username", "john")
selector_type = keys[1]
selector = keys[2]
text = keys[3]
element = _get_web_element(driver_obj, selector_type, selector)
if element:
element.send_keys(text)
return True
else:
print(f"Element not found: {selector_type} - {selector}")
return False
else:
print("Error: Invalid number of arguments for Selenium write()")
print("Use: write(driver, text) or write(driver, selector_type, selector, text)")
return False
# ---------------Invalid Arguments---------------
else:
print("Error: Invalid arguments for write()")
print("Use: write(text) or write(driver, text) or write(driver, selector_type, selector, text)")
return False
except Exception as e:
print(f"Error writing text: {e}")
return False
[docs]
def year():
"""
Get current year.
Returns:
int: Current year (e.g., 2026)
Example:
year() # 2026
"""
return time.localtime().tm_year
[docs]
def zoom(*args):
"""
Zoom in/out using steps or set zoom percentage.
Modes:
1. PyAutoGUI steps/reset: (value)
2. Selenium steps: (driver, value) where value is -9 to +9
3. Selenium percentage: (driver, value) where value is outside -9 to +9
4. Selenium reset: (driver, 100) or (driver, 0)
Args:
*args: Variable arguments::
- (value): PyAutoGUI zoom steps/reset
- (driver, value): Selenium zoom steps/percentage/reset
Returns:
True if successful, False otherwise
Raises:
ValueError: If arguments are invalid, zoom value is not an integer,
PyAutoGUI value is outside -9 to +9 (except 100),
or Selenium percentage is less than 1%.
Output:
- Prints zoom direction and step count (PyAutoGUI).
- Prints new zoom percentage after change (Selenium).
- Prints confirmation when zoom is reset.
Value Logic:
- -9 to +9: Zoom steps (Ctrl+/Ctrl-)
- 100 or 0: Reset to default/100%
- Outside range (except 100): Percentage (Selenium only)
Example:
::
# PyAutoGUI (desktop apps)
zoom(3) # Zoom in 3 steps
zoom(-5) # Zoom out 5 steps
zoom(100) or zoom(0) # Reset to default (Ctrl+0)
When zoom in/out is performed using UI (Ctrl and +/-) in Chrome,
the min %, zoom states % and max % follow this order:
(25, 33, 50, 67, 75, 80, 90, 100, 110, 125, 150, 175, 200, 250, 300, 400, 500)
# Selenium (browser) - Steps
zoom(driver, 3) # Zoom in current + 3 * 10%
zoom(driver, -5) # Zoom out current - 5 * 10%
# Selenium (browser) - Reset
zoom(driver, 100) # Reset to 100%
zoom(driver, 0) # Reset to 100%
# Selenium (browser) - Percentage
zoom(driver, 150) # Set zoom to 150%
zoom(driver, 75) # Set zoom to 75%
zoom(driver, 50) # Set zoom to 50%
zoom(driver, 200) # Set zoom to 200%
Note:
- Selenium zoom is applied via JavaScript and is not reflected in
the Chrome URL bar or the kebab menu zoom indicator.
- PyAutoGUI reset (0 or 100) uses Ctrl+0 which resets to the
application's default zoom, which may not always be 100%
(e.g. a PDF viewer may default to 'fit to page' instead).
- Selenium reset explicitly sets zoom to exactly 100%.
"""
# ------------------------------------------------------------
# DETERMINE MODE
# ------------------------------------------------------------
if len(args) == 1:
# PyAutoGUI mode
use_driver = False
driver_obj = None
value = args[0]
elif len(args) == 2 and hasattr(args[0], 'execute_script'):
# Selenium mode - driver object detected
use_driver = True
driver_obj = args[0]
value = args[1]
else:
raise ValueError("Invalid arguments. Use zoom(value) or zoom(driver, value)")
# Validate value
if not isinstance(value, int):
raise ValueError("Zoom value must be an integer")
# PyAutoGUI restrictions
if not use_driver and value != 100 and abs(value) > 9:
raise ValueError("PyAutoGUI mode supports steps (-9 to +9) or reset (100) or (0)")
try:
# ------------------------------------------------------------
# SELENIUM MODE
# needs: nothing : works on all platforms
# ------------------------------------------------------------
if use_driver:
# Special case: 100 or 0 = Reset
if value == 100 or value == 0:
driver_obj.execute_script("document.body.style.zoom='100%'")
# "reset to 100%" here because Selenium explicitly sets zoom to 100%
# unlike PyAutoGUI's Ctrl+0 which resets to the default zoom level
# of whatever application is currently active
print("Zoom reset to 100%")
return True
# Steps: -9 to +9
elif -9 <= value <= 9:
# Get current zoom level
current_zoom = driver_obj.execute_script("return document.body.style.zoom || '100%'")
current_zoom = int(current_zoom.replace('%', '')) if current_zoom else 100
if value > 0:
new_zoom = current_zoom + (value * 10) # each step = 10%
driver_obj.execute_script(f"document.body.style.zoom='{new_zoom}%'")
print(f"Zoomed in {value} step(s) - zoom: {new_zoom}%")
elif value < 0:
steps = abs(value)
new_zoom = current_zoom - (steps * 10) # each step = 10%
new_zoom = max(10, new_zoom) # minimum 10% zoom
driver_obj.execute_script(f"document.body.style.zoom='{new_zoom}%'")
print(f"Zoomed out {steps} step(s) - zoom: {new_zoom}%")
return True
# Percentage: Outside -9 to +9 (excluding 100)
else:
if value < 1:
raise ValueError("Zoom percentage must be at least 1%")
driver_obj.execute_script(f"document.body.style.zoom='{value}%'")
print(f"Zoom set to {value}%")
return True
# ------------------------------------------------------------
# PYAUTOGUI MODE (desktop apps)
# needs: display
# ------------------------------------------------------------
else:
if not _GUI_AVAILABLE:
print("Error: zoom() requires a display.")
return False
# Special case: 100 or 0 = Reset
if value == 100 or value == 0:
pyautogui.hotkey('ctrl', '0')
# "reset to default" not "reset to 100%" because Ctrl+0 resets to
# the application's default zoom, which may not always be 100%
# e.g. a PDF viewer may default to "fit to page" instead of 100%
print("Zoom reset to default.")
return True
# Steps: -9 to +9
elif value > 0:
# Zoom in
for _ in range(value):
pyautogui.hotkey('ctrl', '+')
time.sleep(0.3)
print(f"Zoomed in {value} step(s)")
elif value < 0:
# Zoom out
steps = abs(value)
for _ in range(steps):
pyautogui.hotkey('ctrl', '-')
time.sleep(0.3)
print(f"Zoomed out {steps} step(s)")
return True
except Exception as e:
print(f"Error zooming: {e}")
return False