Source code for autocore._lib

# AutoCore - Automate Core Actions
# Author: Ash
# GitHub: https://github.com/AshAutomates/AutoCore
# Docs: https://autocore.readthedocs.io
# PyPI: https://pypi.org/project/autocore
# Supports: Windows, Linux


#---------------standard library imports---------------

import atexit
import configparser
import csv
import datetime
import email
import io
import json
import logging
import os
import platform
import re
import shutil
import sqlite3
import subprocess
import sys
import tempfile
import time
import traceback
import warnings
import wave
import xml.etree.ElementTree as et
from difflib import get_close_matches
from logging.handlers import RotatingFileHandler
from pathlib import Path

#---------------networking---------------
import requests
from bs4 import BeautifulSoup

#------------------------------------------------------------
# Image Processing
# Safe on all platforms including headless servers
# PIL.Image does not need a display : only PIL.ImageTk does
#------------------------------------------------------------
import numpy as np
from PIL import Image

#---------------Selenium------------------------
import undetected_chromedriver as uc
from selenium.common.exceptions import *
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select

#--------------- Windows Only---------------
if platform.system() == "Windows":
    import win32con
    import win32gui

#------------------------------------------------------------
# Document Processing
# Safe on all platforms including headless servers
#------------------------------------------------------------
from PyPDF2 import PdfReader
from docx import Document
from openpyxl import Workbook, load_workbook
from pptx import Presentation
import ebooklib
from ebooklib import epub
from odf import teletype
from odf.opendocument import load as odf_load
from odf.text import P
import extract_msg
from striprtf.striprtf import rtf_to_text
import yaml

#------------------------------------------------------------
# GUI (requires display)
# Needs: X display server on Linux
# Works on: Linux desktop, Windows, macOS
# Fails on: Linux headless servers, Docker, CI/CD, SSH sessions
# Equivalent of running: export DISPLAY=:0 && xhost +local:
#------------------------------------------------------------
if platform.system() == "Linux":
    if "DISPLAY" not in os.environ:
        os.environ["DISPLAY"] = ":0"
    if os.system("xhost +local: > /dev/null 2>&1") == 0:
        import tkinter as tk
        from PIL import ImageTk
        import pyautogui

        # Enable PyAutoGUI fail-safe: move mouse to any screen corner to abort script
        pyautogui.FAILSAFE = True
        _GUI_AVAILABLE = True
    else:
        tk = None
        ImageTk = None
        pyautogui = None
        _GUI_AVAILABLE = False
else:
    import tkinter as tk
    from PIL import ImageTk
    import pyautogui

    pyautogui.FAILSAFE = True
    _GUI_AVAILABLE = True

#---------------------------------------------------------------------------
# Audio (requires sound system)
# Works on: Linux desktop, Windows
# Fails on: Linux headless servers with no audio drivers
# piper-tts is loaded lazily inside say() : we just check
# here whether the platform has a usable audio system at all
#
# Linux check 1 : aplay command is available (alsa-utils installed)
# Linux check 2 : at least one real audio playback device exists
#                 on Ubuntu Server with no audio hardware,
#                 'aplay -l' returns non-zero with "no soundcards found"
#                 on Ubuntu Desktop / Linux Mint with real audio hardware,
#                 'aplay -l' returns zero : so _AUDIO_AVAILABLE = True
#---------------------------------------------------------------------------
if platform.system() == "Windows":
    _AUDIO_AVAILABLE = True  # winsound is always available on Windows
elif platform.system() == "Linux":
    aplay_exists = os.system("aplay --version > /dev/null 2>&1") == 0  # alsa-utils installed?
    audio_device_exists = os.system("aplay -l > /dev/null 2>&1") == 0  # real audio hardware present?
    _AUDIO_AVAILABLE = aplay_exists and audio_device_exists
else:
    _AUDIO_AVAILABLE = False  # unsupported platform

#------------------------------------------------------------
# Clipboard (requires clipboard manager)
# Works on: Linux desktop, Windows, macOS
# Fails on: Linux headless servers with no clipboard manager
#------------------------------------------------------------
try:
    import pyperclip

    _CLIPBOARD_AVAILABLE = True
except Exception:
    pyperclip = None
    _CLIPBOARD_AVAILABLE = False

# print current working directory so user knows where files will be saved
print(f"Current Working Directory: {os.getcwd()}")

# to avoid 'RuntimeError: maximum recursion depth exceeded'
sys.setrecursionlimit(1500)

# Global variables to track logging state
_log_file_handler: RotatingFileHandler | None = None
_original_stdout = None
_original_stderr = None
_log_folder: Path = Path("logs")
_script_had_error = False  # Track if unhandled exception occurred


def _preprocess_for_ocr(image):
    """
    Preprocesses an image to improve OCR accuracy before passing to EasyOCR.

    Args:
        image: numpy array of the image in RGB format

    Returns:
        numpy array: Cleaned binary image ready for EasyOCR

    Note:
        - Converts to grayscale to remove color noise.
        - Upscales 2x using cubic interpolation for better character recognition.
        - Applies denoising to remove background noise.
        - Applies Otsu thresholding to produce a clean black and white image.
        - Applies morphological closing to fill small gaps in characters.
        - Lazily imports cv2 to avoid slow startup when running 'from autocore import *'.
    """
    import cv2  # pip install opencv-python

    gray_image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
    gray_image = cv2.resize(gray_image, None, fx=2, fy=2, interpolation=cv2.INTER_CUBIC)
    denoised = cv2.fastNlMeansDenoising(gray_image, h=10)
    threshold_img = cv2.threshold(denoised, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, 1))
    cleaned = cv2.morphologyEx(threshold_img, cv2.MORPH_CLOSE, kernel)
    return cleaned


def _click_word_by_ocr(word_to_search, occurrences_to_click, button='left'):
    """
    Click on text found via OCR on screen.

    Args:
        word_to_search: Text to search for on screen
        occurrences_to_click: 0 = click all occurrences, N = click Nth occurrence only
        button: 'left' for left-click (default), 'right' for right-click

    Returns:
        True if any click happened, False otherwise

    Output:
        - Prints how many occurrences were found and which one(s) were clicked.
        - Prints a message if the requested occurrence number does not exist.
        - Prints a message if the text was not found on screen.

    Note:
        - Used internally by click() and click_right().
        - Cross-platform compatible (Windows, Linux).

    """

    import cv2  # pip install opencv-python

    click_count = 0

    try:
        # Get shared OCR reader
        reader = _get_ocr_reader()

        # Capture screen using pyautogui (cross-platform)
        screenshot_img = pyautogui.screenshot()

        # Convert PIL image to numpy array
        image = np.array(screenshot_img)

        # Convert RGB to BGR for OpenCV
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        cleaned = _preprocess_for_ocr(image)

        # Extract text with bounding boxes using EasyOCR
        results = reader.readtext(cleaned)

        # Filter matching text and collect positions
        matches = []
        for (bbox, text, confidence) in results:
            if word_to_search.lower() in text.lower():
                # Calculate center point (adjust for 2x upscaling)
                center_x = int((bbox[0][0] + bbox[2][0]) / 2 / 2)
                center_y = int((bbox[0][1] + bbox[2][1]) / 2 / 2)
                matches.append((center_x, center_y, text))

        # Sort matches by position (top-to-bottom, then left-to-right)
        matches.sort(key=lambda m: (m[1], m[0]))

        total_matches = len(matches)

        # Handle case when no matches found
        if total_matches == 0:
            print(f"'{word_to_search}' not found on screen")
            return False

        # Determine click action based on button parameter
        click_action = pyautogui.rightClick if button == 'right' else pyautogui.click
        action_name = "right-clicking" if button == 'right' else "clicking"

        # Click on matches based on occurrences_to_click
        if occurrences_to_click == 0:
            # Click all occurrences
            if total_matches == 1:
                print(f"1 occurrence of '{word_to_search}' found, {action_name} it")
            else:
                print(f"{total_matches} occurrences of '{word_to_search}' found, {action_name} all")
            for (x, y, text) in matches:
                click_action(x, y)
                click_count += 1
        elif 0 < occurrences_to_click <= total_matches:
            # Click specific occurrence (1-indexed)
            if total_matches == 1:
                print(f"1 occurrence of '{word_to_search}' found, {action_name} it")
            else:
                print(
                    f"{total_matches} occurrences of '{word_to_search}' found, {action_name} occurrence #{occurrences_to_click}")
            x, y, text = matches[occurrences_to_click - 1]
            click_action(x, y)
            click_count = 1
        else:
            # Requested occurrence doesn't exist
            if total_matches == 1:
                print(
                    f"1 occurrence of '{word_to_search}' found on screen, but you tried to {action_name[:-3]} occurrence #{occurrences_to_click} which doesn't exist")
            else:
                print(
                    f"{total_matches} occurrences of '{word_to_search}' found on screen, but you tried to {action_name[:-3]} occurrence #{occurrences_to_click} which doesn't exist")
            return False

        # Return True if any click happened
        return click_count > 0

    except Exception as e:
        print(f"Error in OCR click: {e}")
        return False


def _get_ocr_reader():
    """
    Returns a single shared OCR reader instance, creating it only once on first call
    and reusing it across all subsequent calls.

    Output:
        - Prints initialization message on first run.
        - Prints confirmation when OCR engine is ready.

    Note:
        - Lazily imports easyocr inside the function to avoid slow startup time
          when running 'from autocore import *' : easyocr is only loaded when
          OCR is actually needed.
        - Tries GPU first, falls back to CPU if GPU is unavailable.
        - Shared across all OCR functions (click, read, etc.).
        - First run downloads OCR models (~100MB).
        - Subsequent runs load instantly from cache.

    Returns:
        easyocr.Reader: Shared OCR reader instance
    """

    # Third-party imports - OCR
    import easyocr  # pip install "numpy<2" easyocr

    if not hasattr(_get_ocr_reader, 'reader'):
        try:
            print("Initializing OCR engine (first run downloads models, please wait)...")
            # Try GPU first
            _get_ocr_reader.reader = easyocr.Reader(['en'], gpu=True, verbose=False)
        except:
            # Fall back to CPU if GPU fails
            _get_ocr_reader.reader = easyocr.Reader(['en'], gpu=False, verbose=False)
        print("OCR engine ready.")
    return _get_ocr_reader.reader


def _get_web_element(driver_obj, selector_type, selector):
    """
    Locates and returns a single web element using Selenium.

    Args:
        driver_obj: Selenium WebDriver instance
        selector_type: Type of selector ('id', 'xpath', 'class', 'name', 'css', 'tag', 'text', 'partial')
        selector: Selector value/string

    Example:
        ::

            element = _get_web_element(driver, 'id', 'submit-button')
            element = _get_web_element(driver, 'xpath', '//button[@type="submit"]')
            element = _get_web_element(driver, 'class', 'btn-primary')

    Note:
        - Used internally by click(), click_right(), write(), copy(), wait() and other functions.
        - 'text' and 'partial' selector types are case-sensitive.
        - Returns None if element is not found instead of raising an exception.

    Returns:
        WebElement if found, None if not found
    """
    try:
        if selector_type == "id":
            return driver_obj.find_element(By.ID, selector)
        elif selector_type == "xpath":
            return driver_obj.find_element(By.XPATH, selector)
        elif selector_type == "class":
            return driver_obj.find_element(By.CLASS_NAME, selector)
        elif selector_type == "name":
            return driver_obj.find_element(By.NAME, selector)
        elif selector_type == "css":
            return driver_obj.find_element(By.CSS_SELECTOR, selector)
        elif selector_type == "tag":
            return driver_obj.find_element(By.TAG_NAME, selector)
        elif selector_type == "text":  # remember, text search is case-sensitive
            return driver_obj.find_element(By.LINK_TEXT, selector)
        elif selector_type == "partial":  # remember, partial text search is case-sensitive
            return driver_obj.find_element(By.PARTIAL_LINK_TEXT, selector)
        else:
            return None
    except NoSuchElementException:
        return None


class _CustomRotatingFileNameHandler(RotatingFileHandler):
    """
    Custom log rotation handler that extends RotatingFileHandler with two behaviours:

    1. Renames rotated files from log.txt.1 format to log_part_1.txt format
       to keep the .txt extension visible and readable in file explorers.
    2. Automatically deletes the oldest log files when the total logs folder
       size exceeds 100MB, keeping disk usage under control.

    Used internally by log_setup().
    """

    def rotation_filename(self, default_name):
        """
        Converts default rotation filename format to a cleaner format.

        Example:
            log_script.txt.1  -->  log_script_part_1.txt
        """
        match = re.search(r'\.txt\.(\d+)$', default_name)
        if match:
            part_num = match.group(1)
            return default_name.replace(f'.txt.{part_num}', f'_part_{part_num}.txt')
        return default_name

    def doRollover(self):
        """
        Performs log rotation and triggers cleanup of old logs if folder exceeds 100MB.
        """
        # Do the normal rotation first
        super().doRollover()

        # Now cleanup old logs if folder exceeds 100MB
        self._cleanup_old_logs()

    def _cleanup_old_logs(self):
        """
        Deletes the oldest log files in the logs folder when total size exceeds 100MB,
        removing files one by one until the folder is back under 100MB.

        Output:
            - Prints name and remaining folder size after each deleted file.
            - Prints warning if a file could not be deleted.
            - Prints warning if cleanup itself fails.
        """
        try:
            log_folder = Path("logs")
            if not log_folder.exists():
                return

            log_files = []
            total_size = 0

            for log_file in log_folder.glob("log_*.txt*"):
                size = log_file.stat().st_size
                mtime = log_file.stat().st_mtime
                log_files.append((log_file, size, mtime))
                total_size += size

            # Convert to MB
            total_size_mb = total_size / (1024 * 1024)
            max_total_mb = 100

            # Keep deleting oldest files until under 100MB
            if total_size_mb > max_total_mb:
                # Sort by modification time (oldest first)
                log_files.sort(key=lambda x: x[2])

                for log_file, size, _ in log_files:
                    if total_size_mb <= max_total_mb:
                        break

                    try:
                        log_file.unlink()  # deletes the file from the filesystem.
                        total_size_mb -= size / (1024 * 1024)
                        print(f"Deleted old log file: {log_file.name} (folder size: {total_size_mb:.1f}MB)")
                    except Exception as e:
                        print(f"Warning: Could not delete {log_file.name}: {e}")

        except Exception as e:
            print(f"Warning: Log cleanup failed: {e}")


class _LogCapture:
    """
    Intercepts stdout and stderr to mirror all print statements to both
    the terminal and the log file simultaneously.

    Used internally by log_setup() to redirect sys.stdout and sys.stderr.

    Note:
        - Uses a guard flag (_logging) to prevent infinite recursion that would
          occur if the logger itself triggers a write() call.
        - flush() is implemented to satisfy the stream interface expected by
          sys.stdout and sys.stderr.
    """

    def __init__(self, original_stream, logger, level):
        """
        Args:
            original_stream: The original sys.stdout or sys.stderr stream
            logger: The logging.Logger instance to write to
            level: Logging level (logging.INFO for stdout, logging.ERROR for stderr)
        """

        self.original_stream = original_stream
        self.logger = logger
        self.level = level
        self._logging = False  # guard flag to prevent infinite recursion

    def write(self, message):
        """
        Writes message to both terminal and log file.
        Skips empty messages and guards against recursive calls.
        """

        self.original_stream.write(message)
        self.original_stream.flush()

        # Write to log file (strip to avoid double newlines)
        # guard flag prevents recursive calls when logger itself triggers write()
        if message.strip() and not self._logging:
            self._logging = True  # block any further recursive calls
            try:
                self.logger.log(self.level, message.rstrip())
            finally:
                self._logging = False  # always release guard even if exception occurs

    def flush(self):
        """
        Flushes the original stream to satisfy the stream interface.
        """
        self.original_stream.flush()


[docs] def browser(url, headless=False, timeout=30, cookie_path=None): """ Initialize and return a browser instance for web automation. Args: url: Target URL to navigate to headless: Run browser in headless mode (default: False) timeout: Maximum seconds to wait for elements to appear (default: 30) cookie_path: Path to cookies JSON file (optional) :: - Cookies MUST be in JSON format - Export from Chrome using "Cookie-Editor" extension - Cookie domain must match the target URL Returns: WebDriver: Browser instance or None if initialization fails Example: :: # Basic usage driver = browser('https://google.com') click(driver, 'id', 'search-button') # Slow-loading site driver = browser('https://slow-site.gov', timeout=90) click(driver, 'id', 'submit-btn') # Waits up to 90s # Fast site testing driver = browser('https://fast-site.com', timeout=5) click(driver, 'id', 'login-btn') # Fails fast in 5s # With cookies driver = browser('https://site.com', cookie_path='cookies.json') # Headless mode driver = browser('https://google.com', headless=True) # Trigger a download and wait for it driver = browser('https://example.com') click(driver, 'id', 'download-button') wait_download(download_dir=driver.download_dir) Note: Uses undetected-chromedriver (uc) to bypass bot detection. Requires Google Chrome to be installed. Windows: winget install Google.Chrome Linux (Ubuntu/Debian/Mint): wget https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb sudo dpkg -i google-chrome-stable_current_amd64.deb sudo apt-get install -f -y Linux (RHEL/CentOS/Fedora): wget https://dl.google.com/linux/direct/google-chrome-stable_current_x86_64.rpm sudo rpm -i google-chrome-stable_current_x86_64.rpm """ # Auto-add https:// if protocol is missing if not url.startswith(('http://', 'https://')): url = 'https://' + url print(f"Protocol not specified. Using: {url}") # Initialize options for Chrome options = uc.ChromeOptions() # --------------- HEADLESS MODE --------------- if headless: options.add_argument("--headless=new") options.add_argument("--window-size=2560,1440") options.add_argument("--disable-gpu") # disable GPU (prevents SwiftShader WebGL signal) options.add_argument("--enable-features=OverlayScrollbar") # mimics real Chrome scrollbar behavior # --------------- CHROME VERSION --------------- # Fetch installed Chrome version dynamically to keep user-agent current. # A matching Chrome version in user-agent makes the browser look more like a real user. # Initialize to None so major_version check below does not raise NameError if detection fails. chrome_version = None try: system = platform.system() if system == "Windows": result = subprocess.run( ['reg', 'query', 'HKEY_CURRENT_USER\\Software\\Google\\Chrome\\BLBeacon', '/v', 'version'], capture_output=True, text=True) chrome_version = result.stdout.strip().split()[-1] elif system == "Linux": result = subprocess.run(['google-chrome', '--version'], capture_output=True, text=True) chrome_version = result.stdout.strip().split()[-1] user_agent = f"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/{chrome_version} Safari/537.36" except Exception: # If Chrome version detection fails, use a generic user-agent without version number user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome Safari/537.36" # Set a user-agent string to make the automated browser look like a regular Chrome browser options.add_argument(f"--user-agent={user_agent}") # Additional options to enhance realism and disable Selenium detection options.add_argument('--start-maximized') # Start browser maximized options.add_argument("--safebrowsing-disable-download-protection") # allow unverified downloads options.add_argument("--disable-features=InsecureDownloadWarnings") # disable insecure download warnings options.add_argument("--disable-features=DownloadBubble") # disable download bubble UI options.add_argument("--no-sandbox") # disable sandbox restrictions options.add_argument("--lang=en-US") # set language to avoid empty navigator.languages options.add_argument("--disable-translate") # disable Google Translate popup options.add_argument("--no-first-run") # skip first-run welcome popups options.add_argument("--no-default-browser-check") # skip default browser prompt # --------------- DOWNLOAD DIRECTORY --------------- # Resolve download directory using same priority as wait_download() # Pre-create it before Chrome launches so Chrome finds and uses it if os.getenv('DOWNLOAD_DIR'): download_dir = os.getenv('DOWNLOAD_DIR') elif os.path.exists('/.dockerenv'): download_dir = '/downloads' else: download_dir = str(Path.home() / 'Downloads') os.makedirs(download_dir, exist_ok=True) # --------------- CHROME PREFERENCES --------------- # Set preferences to avoid unnecessary pop-ups and block notifications prefs = { "download.default_directory": download_dir, # explicitly set download folder "profile.default_content_setting_values.notifications": 2, # block browser notification popups 'credentials_enable_service': False, # disable save password popup 'profile': {'password_manager_enabled': False}, # disable password manager completely 'profile.password_manager_leak_detection': False, # breach popup "profile.default_content_setting_values.notifications": 2, # block notification popups "translate.enabled": False, # disable translate bar "autofill.profile_enabled": False, # disable autofill suggestions "autofill.credit_card_enabled": False, # disable credit card autofill "safebrowsing.enabled": False, # disable safe browsing to allow downloads "download.prompt_for_download": False, # no download confirmation prompt "download.directory_upgrade": True, # allow download directory changes "plugins.always_open_pdf_externally": True, # download PDFs instead of opening in browser "safebrowsing_for_trusted_sources_enabled": False, # disable safe browsing for trusted sources "safebrowsing.disable_download_protection": True, # disable download protection completely } options.add_experimental_option("prefs", prefs) # --------------- INITIALIZE DRIVER --------------- try: # Extract major version from detected Chrome version to prevent ChromeDriver mismatch major_version = int(chrome_version.split('.')[0]) if chrome_version else None # Pass major version to force matching ChromeDriver download driver_instance = uc.Chrome(options=options, version_main=major_version) except Exception as e: print(f"Error initializing Chrome Driver: {e}") return None # --------------- BOT DETECTION PATCHES --------------- # Inject JS to patch remaining bot detection signals driver_instance.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", { "source": """ // Fix navigator.webdriver at prototype level Object.defineProperty(navigator, 'webdriver', { get: () => false, configurable: true, enumerable: true }); try { delete navigator.__proto__.webdriver; } catch(e) {} // Fix plugins to return proper PluginArray-like object const mockPlugins = ['Chrome PDF Plugin', 'Chrome PDF Viewer', 'Native Client'].map(name => { const plugin = Object.create(Plugin.prototype); Object.defineProperty(plugin, 'name', {get: () => name}); Object.defineProperty(plugin, 'filename', {get: () => name.toLowerCase().replace(/ /g, '-') + '.dll'}); Object.defineProperty(plugin, 'description', {get: () => name}); Object.defineProperty(plugin, 'length', {get: () => 0}); return plugin; }); Object.defineProperty(navigator, 'plugins', { get: () => { const arr = Object.create(PluginArray.prototype); mockPlugins.forEach((p, i) => arr[i] = p); Object.defineProperty(arr, 'length', {get: () => mockPlugins.length}); arr.item = i => mockPlugins[i]; arr.namedItem = name => mockPlugins.find(p => p.name === name) || null; arr.refresh = () => {}; return arr; } }); // Fix permissions to return 'prompt' instead of 'denied' const originalQuery = window.navigator.permissions.query.bind(window.navigator.permissions); window.navigator.permissions.query = (parameters) => { const permissionsToSpoof = ['notifications', 'push', 'midi', 'camera', 'microphone', 'geolocation']; if (permissionsToSpoof.includes(parameters.name)) { return Promise.resolve({ state: 'prompt', permission: 'prompt', status: 'prompt', onchange: null }); } return originalQuery(parameters); }; // Also patch inside iframes if (window.top !== window.self) { window.navigator.permissions.query = (parameters) => { return Promise.resolve({state: 'prompt', permission: 'prompt', onchange: null}); }; } // Fix navigator.languages Object.defineProperty(navigator, 'languages', {get: () => ['en-US', 'en']}); // Fix navigator.mimeTypes Object.defineProperty(navigator, 'mimeTypes', {get: () => [ {type: 'application/pdf'}, {type: 'application/x-nacl'}, ]}); // Fix window.chrome window.chrome = { webstore: { onInstallStageChanged: {}, onDownloadProgress: {}, install: function() {}, constructor: function() {} }, runtime: { onConnect: null, onMessage: null, connect: function() {}, sendMessage: function() {}, id: undefined }, loadTimes: function() {}, csi: function() {}, app: { isInstalled: false, InstallState: {DISABLED: 'disabled', INSTALLED: 'installed', NOT_INSTALLED: 'not_installed'}, RunningState: {CANNOT_RUN: 'cannot_run', READY_TO_RUN: 'ready_to_run', RUNNING: 'running'} } }; // Fix WebGL renderer const getParameter = WebGLRenderingContext.prototype.getParameter; WebGLRenderingContext.prototype.getParameter = function(parameter) { if (parameter === 37445) return 'Intel Inc.'; if (parameter === 37446) return 'Intel Iris OpenGL Engine'; return getParameter.call(this, parameter); }; """ }) # Set an implicit wait for elements to be found driver_instance.implicitly_wait(timeout) # Load a blank page to initialize the driver properly before cookie injection driver_instance.get("about:blank") # --------------- COOKIES --------------- # Load cookies from the specified path, if available. if cookie_path and os.path.exists(cookie_path): # Load target page to inject cookie driver_instance.get(url) try: with open(cookie_path, "r", encoding="utf-8") as f: cookies = json.load(f) for cookie in cookies: # Remove fields added by browser extensions that Selenium does not support cookie.pop("sameSite", None) cookie.pop("hostOnly", None) cookie.pop("session", None) cookie.pop("storeId", None) # Selenium rejects cookies with a leading dot in the domain, while browsers accept them, so the dot must be removed for compatibility. if cookie.get("domain", "").startswith("."): cookie["domain"] = cookie["domain"][1:] driver_instance.add_cookie(cookie) # Reload page so cookies show effect driver_instance.get(url) except Exception as e: print(f"Cookie loading failed: {e}") return None else: # No cookies provided so single page load is enough driver_instance.get(url) # Suppress undetected_chromedriver's __del__ cleanup error on exit driver_instance.__class__.__del__ = lambda self: None # Store resolved download directory so caller can pass it to wait_download() driver_instance.download_dir = download_dir # Return the driver instance - user can name it anything! return driver_instance
[docs] def click(*where): """ Performs left-click based on different input types. Modes: 1. Image matching: Click on visual element 2. OCR text matching: Click on text found on screen (with nth occurrence support) 3. Coordinates: Click at specific x, y position 4. Color matching: Click on specific color in region 5. Selenium web element: Click on element in browser Args: *where: Variable arguments depending on click mode Returns: True if successful, False otherwise Example: :: # Image matching click('button.png') # OCR text matching click('Submit') # Click first occurrence click('Submit', 2) # Click 2nd occurrence click('Login', 0) # Click all occurrences # Coordinates click(100, 200) # Color matching in region click(x1, y1, x2, y2, r, g, b) # Find and click color click(x1, y1, x2, y2, r, g, b, tolerance) # With tolerance # Selenium (pass driver object first) click(driver, 'id', 'submit-button') click(driver, 'xpath', '//button[@id="submit"]') click(driver, 'class', 'btn-primary') click(driver, 'name', 'username') click(driver, 'css', 'button.submit') click(driver, 'tag', 'button') click(driver, 'text', 'Click Here') click(driver, 'partial', 'Click') """ import cv2 # --------------- SELENIUM --------------- if len(where) > 0 and hasattr(where[0], 'find_element'): driver_obj = where[0] if len(where) < 3: print("Error: Selenium click requires 3 arguments: click(driver, selector_type, selector)") return False selector_type = where[1] selector = where[2] try: element = _get_web_element(driver_obj, selector_type, selector) if element: driver_obj.execute_script("arguments[0].scrollIntoView({block: 'center'});", element) try: element.click() except ElementClickInterceptedException: # Fall back to JavaScript click when element is blocked by an overlay (e.g. ads) driver_obj.execute_script("arguments[0].click();", element) return True else: print(f"Element not found: {selector_type} - {selector}") return False except NoSuchElementException: print(f"Element of {selector_type} - {selector} not found.") return False except Exception as e: print(f"Error clicking element: {e}") return False # --------------- PYAUTOGUI --------------- if not _GUI_AVAILABLE: print("Error: click() requires a display.") return False # --------------- IMAGE MATCHING / OCR TEXT --------------- elif len(where) == 1: if '.' in where[0]: pyautogui.click(where[0]) return True else: result = _click_word_by_ocr(where[0], 1, button='left') return result # --------------- COORDINATES / OCR WITH OCCURRENCE --------------- elif len(where) == 2: if isinstance(where[0], int) and isinstance(where[1], int): pyautogui.click(where[0], where[1]) return True elif isinstance(where[0], str) and isinstance(where[1], int): result = _click_word_by_ocr(where[0], where[1], button='left') return result else: print("Error: Invalid arguments for click()") return False # --------------- COLOR MATCHING IN REGION --------------- elif len(where) in [7, 8]: x_from, y_from, x_to, y_to, r, g, b = where[:7] tolerance = where[7] if len(where) == 8 else 0 try: screenshot_img = pyautogui.screenshot(region=(x_from, y_from, x_to - x_from, y_to - y_from)) screenshot_img = np.array(screenshot_img) screenshot_img = cv2.cvtColor(screenshot_img, cv2.COLOR_RGB2BGR) lower = np.array([b - tolerance, g - tolerance, r - tolerance]) upper = np.array([b + tolerance, g + tolerance, r + tolerance]) mask = cv2.inRange(screenshot_img, lower, upper) points = cv2.findNonZero(mask) if points is not None: click_x, click_y = points[0][0] pyautogui.click(x_from + click_x, y_from + click_y) print(f'Pixel found and clicked at ({x_from + click_x}, {y_from + click_y}).') return True print('Pixel not found.') return False except Exception as e: print(f"Error during color search: {e}") return False # ---------------Invalid Arguments--------------- else: print("Error: Invalid arguments for click()") return False
[docs] def click_right(*where): """ Performs right-click (context menu) based on different input types. Modes: 1. Image matching: Right-click on visual element 2. OCR text matching: Right-click on text found on screen (with nth occurrence support) 3. Coordinates: Right-click at specific x, y position 4. Color matching: Right-click on specific color in region 5. Selenium web element: Right-click on element in browser Args: *where: Variable arguments depending on click mode Returns: True if successful, False otherwise Example: :: # Image matching click_right('button.png') # OCR text matching click_right('Submit') # Right-click first occurrence click_right('Submit', 2) # Right-click 2nd occurrence click_right('Login', 0) # Right-click all occurrences # Coordinates click_right(100, 200) # Color matching in region click_right(x1, y1, x2, y2, r, g, b) # Find and right-click color click_right(x1, y1, x2, y2, r, g, b, tolerance) # With tolerance # Selenium (pass driver object first) click_right(driver, 'id', 'submit-button') click_right(driver, 'xpath', '//button[@id="submit"]') click_right(driver, 'class', 'btn-primary') click_right(driver, 'name', 'username') click_right(driver, 'css', 'button.submit') click_right(driver, 'tag', 'button') click_right(driver, 'text', 'Click Here') click_right(driver, 'partial', 'Click') """ import cv2 # --------------- SELENIUM --------------- if len(where) > 0 and hasattr(where[0], 'find_element'): driver_obj = where[0] if len(where) < 3: print("Error: Selenium click_right requires 3 arguments: click_right(driver, selector_type, selector)") return False selector_type = where[1] selector = where[2] try: element = _get_web_element(driver_obj, selector_type, selector) if element: driver_obj.execute_script("arguments[0].scrollIntoView({block: 'center'});", element) ActionChains(driver_obj).context_click(element).perform() return True else: print(f"Element not found: {selector_type} - {selector}") return False except NoSuchElementException: print(f"Element of {selector_type} - {selector} not found.") return False except Exception as e: print(f"Error right-clicking element: {e}") return False # --------------- PYAUTOGUI --------------- if not _GUI_AVAILABLE: print("Error: click_right() requires a display.") return False # --------------- IMAGE MATCHING / OCR TEXT --------------- elif len(where) == 1: if '.' in where[0]: pyautogui.rightClick(where[0]) return True else: result = _click_word_by_ocr(where[0], 1, button='right') return result # --------------- COORDINATES / OCR WITH OCCURRENCE --------------- elif len(where) == 2: if isinstance(where[0], int) and isinstance(where[1], int): pyautogui.rightClick(where[0], where[1]) return True elif isinstance(where[0], str) and isinstance(where[1], int): result = _click_word_by_ocr(where[0], where[1], button='right') return result else: print("Error: Invalid arguments for click_right()") return False # --------------- COLOR MATCHING IN REGION --------------- elif len(where) in [7, 8]: x_from, y_from, x_to, y_to, r, g, b = where[:7] tolerance = where[7] if len(where) == 8 else 0 try: screenshot_img = pyautogui.screenshot(region=(x_from, y_from, x_to - x_from, y_to - y_from)) screenshot_img = np.array(screenshot_img) screenshot_img = cv2.cvtColor(screenshot_img, cv2.COLOR_RGB2BGR) lower = np.array([b - tolerance, g - tolerance, r - tolerance]) upper = np.array([b + tolerance, g + tolerance, r + tolerance]) mask = cv2.inRange(screenshot_img, lower, upper) points = cv2.findNonZero(mask) if points is not None: click_x, click_y = points[0][0] pyautogui.rightClick(x_from + click_x, y_from + click_y) print(f'Pixel found and right-clicked at ({x_from + click_x}, {y_from + click_y}).') return True print('Pixel not found.') return False except Exception as e: print(f"Error during color search: {e}") return False # ---------------Invalid Arguments--------------- else: print("Error: Invalid arguments for click_right()") return False
[docs] def copy(*where): """ Copies text from various sources: screen, clipboard, Selenium elements or web pages. Modes: 1. Active window: Copy all content from current window 2. Clipboard: Get current clipboard content 3. Screen coordinates: Click at position and copy 4. Selenium webpage: Copy entire page content 5. Selenium element: Copy element text or attribute value Args: *where: Variable arguments depending on copy mode Returns: str: Copied text or '' if nothing was copied Example: :: # Active window - Copy everything from current window # Ctrl+A, Ctrl+C from active window copy() # Clipboard # Get current clipboard content copy('clipboard') # Screen coordinates # Click at (500, 300) and copy copy(500, 300) # Selenium webpage - Copy entire page # Copy all webpage content copy(driver) # Selenium element - Copy text copy(driver, 'id', 'username-display') copy(driver, 'xpath', '//div[@class="name"]') copy(driver, 'class', 'user-info') copy(driver, 'name', 'description') copy(driver, 'css', 'p.content') copy(driver, 'tag', 'h1') copy(driver, 'text', 'Welcome') copy(driver, 'partial', 'Hello') # Selenium element - Copy attribute copy(driver, 'id', 'download-link', 'href') # Get link URL copy(driver, 'class', 'product-img', 'src') # Get image source copy(driver, 'id', 'email-field', 'value') # Get input value copy(driver, 'xpath', '//a[@id="link"]', 'title') # Get title attribute """ result = None # --------------- MODE 1 : ACTIVE WINDOW --------------- # needs: GUI + clipboard if len(where) == 0: if not _GUI_AVAILABLE: print("Error: copy() requires a display.") return '' if not _CLIPBOARD_AVAILABLE: print("Error: copy() requires a clipboard manager.") return '' pyperclip.copy('') pyautogui.hotkey('ctrl', 'a') time.sleep(1) pyautogui.hotkey('ctrl', 'c') time.sleep(1) result = pyperclip.paste().strip() # --------------- ONE ARGUMENT --------------- elif len(where) == 1: # --------------- MODE 2 : CLIPBOARD --------------- # needs: clipboard only if where[0] == 'clipboard': if not _CLIPBOARD_AVAILABLE: print("Error: copy('clipboard') requires a clipboard manager.") return '' result = pyperclip.paste().strip() # --------------- MODE 3 : SELENIUM WEBPAGE --------------- # needs: clipboard (for paste after Ctrl+C) elif hasattr(where[0], 'find_element'): if not _CLIPBOARD_AVAILABLE: print("Error: copy(driver) requires a clipboard manager.") return '' driver_obj = where[0] pyperclip.copy('') try: action = ActionChains(driver_obj) action.key_down(Keys.CONTROL).send_keys('a').key_up(Keys.CONTROL).perform() time.sleep(1) action.key_down(Keys.CONTROL).send_keys('c').key_up(Keys.CONTROL).perform() time.sleep(1) result = pyperclip.paste().strip() pyperclip.copy('') driver_obj.execute_script("window.getSelection().removeAllRanges();") except Exception as e: print(f"Error copying webpage content: {e}") else: print(f"Invalid argument: {where[0]}") # --------------- TWO ARGUMENTS --------------- elif len(where) == 2: # --------------- MODE 4 : SCREEN COORDINATES --------------- # needs: GUI + clipboard if isinstance(where[0], int) and isinstance(where[1], int): if not _GUI_AVAILABLE: print("Error: copy(x, y) requires a display.") return '' if not _CLIPBOARD_AVAILABLE: print("Error: copy(x, y) requires a clipboard manager.") return '' pyperclip.copy('') pyautogui.click(where[0], where[1]) time.sleep(1) pyautogui.hotkey('ctrl', 'a') time.sleep(1) pyautogui.hotkey('ctrl', 'c') time.sleep(1) result = pyperclip.paste().strip() elif hasattr(where[0], 'find_element'): print("Error: Selenium copy requires 3 arguments: copy(driver, selector_type, selector)") print("Example: copy(driver, 'id', 'username')") else: print("Invalid arguments for copy()") # --------------- MODE 5 : SELENIUM ELEMENT TEXT OR ATTRIBUTE --------------- # needs: nothing : reads directly from DOM, no clipboard needed elif len(where) in [3, 4]: if hasattr(where[0], 'find_element'): driver_obj = where[0] selector_type = where[1] selector = where[2] attribute = where[3] if len(where) == 4 else None try: element = _get_web_element(driver_obj, selector_type, selector) if element: if attribute: attr_value = element.get_attribute(attribute) result = attr_value.strip() if attr_value else '' else: result = element.text.strip() else: print(f"Element not found: {selector_type} - {selector}") except Exception as e: print(f"Error copying from element: {e}") else: print("Error: For Selenium mode, first argument must be driver object") # --------------- INVALID ARGUMENTS --------------- else: print("Invalid arguments provided for copy()") # --------------- RETURN RESULT --------------- if result == '' or result is None: print("No content returned from copy()") return '' else: return result
[docs] def csv_to_xlsx(csv_file=None, delete_csv=True): """ Converts CSV file(s) to XLSX format. Args: csv_file: Path to CSV file or None to auto-detect single CSV in current directory delete_csv: If True, deletes original CSV after conversion (default: True) Returns: str: Path of created XLSX file or None if error Output: - Prints the detected CSV filename when auto-detected. - Prints conversion result showing source and destination filenames. - Prints confirmation when original CSV is deleted. Example: :: # Auto-detect single CSV in current directory (deletes CSV by default) csv_to_xlsx() # Finds, converts and deletes CSV # Specific file (deletes CSV by default) csv_to_xlsx('data.csv') # Converts and deletes data.csv # Keep original CSV csv_to_xlsx('report.csv', delete_csv=False) # Keeps report.csv """ # Auto-detect CSV if not provided if csv_file is None: csv_files = list(Path('.').glob('*.csv')) if len(csv_files) == 0: print("Error: No CSV files found in current directory") return None elif len(csv_files) == 1: csv_file = str(csv_files[0]) print(f"Auto-detected CSV file: {csv_file}") else: print(f"Error: Multiple CSV files found ({len(csv_files)}). Please specify which one to convert:") for f in csv_files: print(f" - {f.name}") return None new_wb = Workbook() new_ws = new_wb.active try: with open(csv_file, 'r', encoding='utf-8', newline='') as f: reader = csv.reader(f) for row in reader: new_ws.append(row) xlsx_file = Path(csv_file).with_suffix(".xlsx") new_wb.save(xlsx_file) print(f"Converted: {csv_file} to {xlsx_file.name}") if delete_csv: os.remove(csv_file) print(f"Deleted: {csv_file}") return str(xlsx_file) except Exception as e: print(f"Error converting CSV to XLSX: {e}") return None
[docs] def date(): """ Get current day of month. Returns: int: Current day (1-31) Example: date() # 24 """ return time.localtime().tm_mday
[docs] def day(): """ Get current day of week. Returns: str: Day name in lowercase (monday, tuesday, wednesday, thursday, friday, saturday, sunday) Example: :: # Weekday check if day() == 'monday': print("It is Monday today.") """ days = ['monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday'] return days[time.localtime().tm_wday]
[docs] def drag(*args): """ Drag from source to target. Modes: 1. PyAutoGUI screen drag: (x1, y1, x2, y2) 2. Selenium element drag: (driver, src_type, src_selector, tgt_type, tgt_selector) Args: PyAutoGUI: (x1, y1, x2, y2) Selenium: (driver, src_type, src_selector, tgt_type, tgt_selector) Returns: True if successful, False otherwise Output: - Prints drag confirmation showing source and target coordinates (PyAutoGUI). - Prints drag confirmation showing source and target selectors (Selenium). Example: :: # Screen drag (PyAutoGUI) - 2 second duration drag(100, 200, 500, 600) # Web element drag (Selenium) drag(driver, 'id', 'card-1', 'class', 'done-column') drag(driver, 'xpath', '//li[1]', 'xpath', '//li[5]') # Multiple drivers driver1 = browser('https://trello.com') driver2 = browser('https://jira.com') drag(driver1, 'id', 'task-1', 'id', 'done-column') drag(driver2, 'class', 'issue', 'class', 'backlog') """ # ------------------------------------------------------------ # MODE 1 : PYAUTOGUI SCREEN DRAG (4 integer arguments) # needs: display # ------------------------------------------------------------ if len(args) == 4 and all(isinstance(arg, int) for arg in args): if not _GUI_AVAILABLE: print("Error: drag() requires a display.") return False x1, y1, x2, y2 = args try: pyautogui.moveTo(x1, y1, 1, pyautogui.easeInOutQuad) pyautogui.dragTo(x2, y2, duration=2, button='left') print(f"Dragged from ({x1}, {y1}) to ({x2}, {y2})") return True except Exception as e: print(f"Error during drag: {e}") return False # ------------------------------------------------------------ # MODE 2 : SELENIUM ELEMENT DRAG (5 arguments, first is WebDriver) # needs: nothing : works on all platforms # ------------------------------------------------------------ elif len(args) == 5 and hasattr(args[0], 'find_element'): driver_obj = args[0] src_type = args[1] src_selector = args[2] tgt_type = args[3] tgt_selector = args[4] valid_selectors = ['id', 'xpath', 'class', 'name', 'css', 'tag', 'text', 'partial'] if src_type not in valid_selectors: raise ValueError(f"Invalid source selector type '{src_type}'. Valid: {', '.join(valid_selectors)}") if tgt_type not in valid_selectors: raise ValueError(f"Invalid target selector type '{tgt_type}'. Valid: {', '.join(valid_selectors)}") try: source = _get_web_element(driver_obj, src_type, src_selector) target = _get_web_element(driver_obj, tgt_type, tgt_selector) if not source: print(f"Source element not found: {src_type} = '{src_selector}'") return False if not target: print(f"Target element not found: {tgt_type} = '{tgt_selector}'") return False ActionChains(driver_obj).drag_and_drop(source, target).perform() print(f"Dragged element from {src_type}='{src_selector}' to {tgt_type}='{tgt_selector}'") return True except Exception as e: print(f"Error during Selenium drag: {e}") return False # ---------------Invalid Arguments--------------- else: raise ValueError( "Invalid arguments. Use drag(x1, y1, x2, y2) or drag(driver, src_type, src_selector, tgt_type, tgt_selector)")
[docs] def erase(*args): """ Erase/clear text from input fields. Modes: 1. PyAutoGUI active window: () 2. Selenium specific element: (driver, selector_type, selector) Args: *args: Variable arguments depending on mode Returns: True if successful, False otherwise Example: :: # PyAutoGUI mode (erase active window) erase() # Select all and delete (Ctrl+A, Delete) # Selenium mode (erase specific element) erase(driver, 'id', 'username') # Clear username field erase(driver, 'xpath', '//input[@name="email"]') # Clear email field erase(driver, 'class', 'search-box') # Clear search box """ try: # ------------------------------------------------------------ # MODE 1 : PYAUTOGUI ACTIVE WINDOW (no arguments) # needs: display # ------------------------------------------------------------ if len(args) == 0: if not _GUI_AVAILABLE: print("Error: erase() requires a display.") return False pyautogui.hotkey('ctrl', 'a', 'delete') print("Erased content in active window") return True # ------------------------------------------------------------ # MODE 2 : SELENIUM ELEMENT (driver + selector) # needs: nothing : works on all platforms # ------------------------------------------------------------ elif len(args) == 3 and hasattr(args[0], 'find_element'): driver_obj = args[0] selector_type = args[1] selector = args[2] element = _get_web_element(driver_obj, selector_type, selector) if element: element.clear() print(f"Cleared element: {selector_type} - {selector}") return True else: print(f"Element not found: {selector_type} - {selector}") return False # ---------------Invalid Arguments--------------- else: print("Error: Invalid arguments for erase()") print("Use: erase() or erase(driver, selector_type, selector)") return False except Exception as e: print(f"Error erasing content: {e}") return False
[docs] def find_browser(*args): """ Find text in browser using Ctrl+F (find function). Args: *args: Variable arguments depending on mode Returns: True if successful, False otherwise Output: - Prints confirmation with the searched text (PyAutoGUI). - Prints confirmation if text was found and highlighted (Selenium). - Prints a message if text was not found on the page (Selenium). Example: :: # PyAutoGUI mode (any window) find_browser('Python') # Find in active window find_browser('error message') # Find phrase # Selenium mode (browser) find_browser(driver, 'Python') # Find in Selenium browser find_browser(driver, 'contact us') # Find phrase in browser Note: - PyAutoGUI: Opens find dialog (Ctrl+F), types search term, presses Enter, then Esc. - Selenium: Uses JavaScript to highlight matching text on the page in yellow and scrolls to the first match. Removes any previous highlights before applying new ones. - Default wait time between actions is 1 second (PyAutoGUI only). """ wait_time = 1 # Default wait time (1 second) try: # ------------------------------------------------------------ # MODE 1 : SELENIUM (driver object passed) # needs: nothing : works on all platforms # ------------------------------------------------------------ if len(args) >= 2 and hasattr(args[0], 'execute_script'): driver_obj = args[0] search_text = args[1] # Use JavaScript to find and highlight text # Using raw string (r"...") to avoid escape sequence warnings script = r""" var searchText = arguments[0]; var body = document.body; var innerHTML = body.innerHTML; // Remove previous highlights innerHTML = innerHTML.replace(/<mark style="background-color: yellow;">([^<]*)<\/mark>/gi, "$1"); // Add new highlights var regex = new RegExp(searchText, "gi"); innerHTML = innerHTML.replace(regex, '<mark style="background-color: yellow;">$&</mark>'); body.innerHTML = innerHTML; // Scroll to first match var firstMatch = document.querySelector('mark'); if (firstMatch) { firstMatch.scrollIntoView({ behavior: 'smooth', block: 'center' }); return true; } return false; """ # Pass search_text as argument to avoid string interpolation issues result = driver_obj.execute_script(script, search_text) if result: print(f"[Selenium] Text '{search_text}' found and highlighted") return True else: print(f"[Selenium] Text '{search_text}' not found on page") return False # ------------------------------------------------------------ # MODE 2 : PYAUTOGUI (no driver object) # needs: display # ------------------------------------------------------------ elif len(args) == 1: if not _GUI_AVAILABLE: print("Error: find_browser() requires a display.") return False search_text = args[0] # Open browser find dialog pyautogui.hotkey('ctrl', 'f') time.sleep(wait_time) # Type search text pyautogui.typewrite(search_text) time.sleep(wait_time) # Press Enter to find pyautogui.press('enter') time.sleep(wait_time) # Close find dialog pyautogui.press('esc') time.sleep(wait_time) print(f"[PyAutoGUI] Searched for '{search_text}'") return True # ---------------Invalid Arguments--------------- else: print("Error: Invalid arguments for find_browser()") print("Use: find_browser(text) or find_browser(driver, text)") return False except Exception as e: print(f"Error during browser find: {e}") return False
[docs] def find_key(data, key): """ Recursively finds all values of a specified key in nested data structures (dictionaries, lists and tuples). Particularly useful for searching deeply nested JSON data from API responses or parsed files. Args: data: Data structure to search (dict, list or tuple) key: Key name to find Returns: list: All values found for the key (empty list if not found) Example: :: # Single occurrence data = {'name': 'John', 'age': 30} name = find_key(data, 'name')[0] # 'John' # Multiple occurrences data = { 'user': {'id': 1, 'name': 'Alice'}, 'admin': {'id': 2, 'name': 'Bob'} } ids = find_key(data, 'id') # [1, 2] # Nested lists/tuples data = {'users': [{'age': 25}, {'age': 30}]} ages = find_key(data, 'age') # [25, 30] # API response workflow response = requests.get('https://api.example.com/users').json() ids = find_key(response, 'id') # finds all 'id' values # Parsed file workflow data = json.loads(read('data.json')) hosts = find_key(data, 'host') # finds all 'host' values """ results = [] def extract(obj): """Recursively search for key in nested structure""" if isinstance(obj, dict): for k, v in obj.items(): if k == key: results.append(v) if isinstance(v, (dict, list, tuple)): extract(v) elif isinstance(obj, (list, tuple)): for item in obj: extract(item) extract(data) return results
[docs] def find_str(string, starts_after, ends_before, index=0): """ Extracts substring between two markers. Args: string: Text to search in starts_after: Start extraction after this ends_before: End extraction before this index: Which match (0=first, -1=last, 1=second, etc.) Returns: str or None: Extracted string or None if not found Example: :: # Extract version number from string text = 'Version: 1.0.5 released' version = find_str(text, 'Version: ', ' released') # version = '1.0.5' # Extract last occurrence using index=-1 text = 'User: Alice logged in. User: Bob logged in' last_user = find_str(text, 'User: ', ' logged', -1) # last_user = 'Bob' """ try: # Escape special regex characters starts_after = re.escape(starts_after) ends_before = re.escape(ends_before) # Build regex pattern pattern = f'{starts_after}(.*?){ends_before}' matches = re.findall(pattern, string) if not matches: return None try: return matches[index].strip() except IndexError: return None except re.error as e: print(f"Regex error in find_str: {e}") return None except Exception as e: print(f"Error in find_str: {e}") return None
[docs] def hour(): """ Get current hour. Returns: int: Current hour (0-23) in 24-hour format Example: hour() # 14 (2 PM) """ return time.localtime().tm_hour
[docs] def inspect(): """ Opens GUI to inspect pixel position and color with zoomed preview. Usage: 1. Click on the Pixel Inspector window to bring it into focus. 2. Move the mouse to the desired pixel. 3. Press 'ESC' to capture. Output: - Prints position and RGB/HEX values to console. - Copies 'x, y, r, g, b' to clipboard. """ # ------------------------------------------------------------ # GUARD : needs display, tkinter, pyautogui and clipboard # ------------------------------------------------------------ if not _GUI_AVAILABLE: print("Error: inspect() requires a display.") return if not _CLIPBOARD_AVAILABLE: print("Error: inspect() requires a clipboard manager.") return # Track whether the inspector window is still open window_open = True def capture_pixel(event=None): """Called when ESC is pressed. Captures current mouse position and color.""" nonlocal window_open try: # Get current mouse position x, y = pyautogui.position() # Get the RGB color of the pixel at current mouse position color = pyautogui.pixel(x, y) # Convert RGB to HEX format hex_color = f"#{color[0]:02x}{color[1]:02x}{color[2]:02x}".upper() # Format the full output string for console formatted_output = f"Pixel at ({x}, {y}): RGB {color} | HEX {hex_color}" # Format the clipboard string as 'x, y, r, g, b' clipboard_text = f"{x}, {y}, {color[0]}, {color[1]}, {color[2]}" # Print to console print(formatted_output) print(f"Copied to clipboard: {clipboard_text}") # Copy to clipboard pyperclip.copy(clipboard_text) # Mark window as closed and destroy the tkinter window window_open = False root.destroy() except Exception as e: print(f"Error capturing pixel: {e}") # noinspection PyTypeChecker def update_color_and_position(): """Called repeatedly every 100ms to update the live preview.""" # Stop updating if the window has been closed if not window_open: return try: # Get current mouse position x, y = pyautogui.position() # Get screen dimensions to validate mouse position screen_width, screen_height = pyautogui.size() # Skip update if mouse is outside screen bounds if x < 0 or y < 0 or x >= screen_width or y >= screen_height: root.after(30, update_color_and_position) return # Get RGB color of the pixel at current mouse position color = pyautogui.pixel(x, y) # Convert RGB to HEX format hex_color = f"#{color[0]:02x}{color[1]:02x}{color[2]:02x}".upper() # Update the position, RGB and HEX labels in the GUI position_label.config(text=f"Position: ({x}, {y})") color_label.config(text=f"RGB: {color}") hex_label.config(text=f"HEX: {hex_color}") # Update the color swatch to show the current pixel color color_display.config(bg=hex_color.lower()) # --- Zoomed Preview --- # Define the capture area size around the cursor (30x30 pixels) zoom_size = 30 # Calculate top-left corner of the capture region centered on cursor left = max(0, x - zoom_size // 2) top = max(0, y - zoom_size // 2) # Clamp the region so it does not go beyond screen edges left = min(left, screen_width - zoom_size) top = min(top, screen_height - zoom_size) # Take a screenshot of the small region around the cursor screenshot = pyautogui.screenshot(region=(left, top, zoom_size, zoom_size)) # Resize the region to 240x240 for 8x magnification using nearest # neighbor to keep pixels sharp and not blurred zoomed = screenshot.resize((240, 240), Image.Resampling.NEAREST) # Convert the zoomed image to a format tkinter can display photo = ImageTk.PhotoImage(zoomed) # Clear the canvas and draw the new zoomed image zoom_canvas.delete("all") zoom_canvas.create_image(120, 120, image=photo) # Keep a reference to prevent garbage collection zoom_canvas.image = photo # Draw a red crosshair at the center of the zoomed preview zoom_canvas.create_line(120, 110, 120, 130, fill='red', width=2) # vertical zoom_canvas.create_line(110, 120, 130, 120, fill='red', width=2) # horizontal except Exception as e: print(f"Pixel preview update failed: {e}") return # Schedule the next update after 100ms root.after(30, update_color_and_position) # --- Build the tkinter GUI --- # Create the main window root = tk.Tk() root.title("Pixel Inspector") root.geometry("400x560") root.configure(bg='#4D4D4D') # Bind ESC key to capture_pixel so pressing ESC triggers the capture root.bind('<Escape>', capture_pixel) # Label to show current mouse position position_label = tk.Label(root, text="Position:", font=("Helvetica", 12), bg='#4D4D4D', fg='white') position_label.pack() # Label to show current RGB values color_label = tk.Label(root, text="RGB:", font=("Helvetica", 12), bg='#4D4D4D', fg='white') color_label.pack() # Label to show current HEX value hex_label = tk.Label(root, text="HEX:", font=("Helvetica", 12), bg='#4D4D4D', fg='white') hex_label.pack() # Color swatch frame that fills with the current pixel color color_display = tk.Frame(root, height=60, width=200) color_display.pack(pady=10) # Label above the zoomed preview canvas zoom_label = tk.Label(root, text="Zoomed Preview (8x):", font=("Helvetica", 10), bg='#4D4D4D', fg='white') zoom_label.pack() # Canvas where the zoomed pixel preview is drawn zoom_canvas = tk.Canvas(root, width=240, height=240, bg='#2D2D2D', highlightthickness=2, highlightbackground='white') zoom_canvas.pack(pady=5) # Instruction label at the bottom instruction_label = tk.Label(root, text="Press 'ESC' to capture and exit", font=("Helvetica", 10), bg='#4D4D4D', fg='white') instruction_label.pack(pady=10) # Start the live update loop update_color_and_position() # Start the tkinter event loop : blocks here until window is closed root.mainloop()
[docs] def log_setup(title): """ Sets up logging and terminal styling for the script. Combines terminal setup with comprehensive logging and automatic color-coded status indication. Creates a logs folder and saves all output with timestamps. Shows output in terminal while also saving to file. Args: title: Name for both terminal title and log file Example: :: log_setup("MyScript") print("This gets logged") # ... script runs ... # Terminal turns GREEN on success or RED on crash Log file format: :: logs/log_MyScript_2026-03-26_14-30-45_IST_session-1.txt (active - newest logs) logs/log_MyScript_2026-03-26_14-30-45_IST_session-1_part_1.txt (2nd newest - rotated) logs/log_MyScript_2026-03-26_14-30-45_IST_session-1_part_2.txt (3rd newest) ... logs/log_MyScript_2026-03-26_14-30-45_IST_session-1_part_9.txt (oldest backup) Session numbering: :: session-1 : First run of this script session-2 : Second run of this script session-3 : Third run, etc. session-N : Automatically increments based on existing log files Features: - Sets terminal title and colors (blue bg, white text) - Automatic color changes: Blue to Green (success) or Blue to Red (crash) - Automatic session numbering (increments from previous runs) - Captures all print() statements - Captures all errors and exceptions - Adds timestamp to each entry - Shows output in terminal AND saves to file - Automatic file rotation (10MB per file, max 10 files = 100MB per session) - Automatic cleanup (keeps max 100MB total logs across all sessions) Note: Terminal colors change automatically based on script outcome:: Blue background : Script is running Green background : Script completed successfully Red background : Script crashed (unhandled exception) """ global _log_file_handler, _original_stdout, _original_stderr, _log_folder # ---------- TERMINAL SETUP ---------- system = platform.system() if system == "Windows": # Clear the screen os.system('cls') # Set the window title (works in both CMD and PowerShell) os.system(f'title {title}') # Try CMD color command first (Blue background, white text) result = os.system('color 1F') # If color command failed (PowerShell), use ANSI escape codes if result != 0: # Enable ANSI color support in PowerShell print("\033[97m\033[44m", end="", flush=True) elif system == "Linux": # Set the window title using ANSI escape codes print(f"\33]0;{title}\a", end="", flush=True) # Set terminal text color to bright white and background to blue # ANSI: 97 = bright white text, 44 = blue background print("\033[97m\033[44m", end="") # Clear the screen os.system('clear') elif system == "Darwin": print("Terminal colors not supported on macOS (logging will still work)") # ---------- LOGGING SETUP ---------- # Create logs folder if it doesn't exist _log_folder.mkdir(exist_ok=True) # ---------- DETERMINE SESSION NUMBER ---------- # Find all existing log files for this script title existing_logs = list(_log_folder.glob(f"log_{title}_*_session-*.txt")) # Extract session numbers from existing log files session_numbers = [] for log_file in existing_logs: # Pattern: log_title_timestamp_timezone_session-N.txt filename = log_file.stem # Get filename without extension # Extract session number using regex match = re.search(r'_session-(\d+)', filename) if match: session_numbers.append(int(match.group(1))) # Determine next session number if session_numbers: next_session = max(session_numbers) + 1 else: next_session = 1 print(f"Starting session {next_session} for '{title}'") # ---------- GET TIMESTAMP WITH TIMEZONE ---------- # Get current time with system's local timezone try: now = datetime.datetime.now().astimezone() # auto-detects system timezone time_zone_name = now.tzname() or "LOCAL" # e.g., IST, EST, PDT except Exception: now = datetime.datetime.now() time_zone_name = "LOCAL" # Create log filename with session number timestamp = now.strftime("%Y-%m-%d_%H-%M-%S") log_filename = f"log_{title}_{timestamp}_{time_zone_name}_session-{next_session}.txt" log_filepath = _log_folder / log_filename # ---------- CONFIGURE LOGGER ---------- # Configure logger logger = logging.getLogger(title) logger.setLevel(logging.DEBUG) # Remove existing handlers logger.handlers.clear() # Create file handler with rotation (10MB per file) _log_file_handler = _CustomRotatingFileNameHandler( log_filepath, maxBytes=10 * 1024 * 1024, # 10MB backupCount=9, # 10 files total (1 active + 9 backups) = 100MB encoding='utf-8' # fixes UnicodeEncodeError for special characters ) # Create formatter with timestamp formatter = logging.Formatter( '%(asctime)s -> %(message)s', datefmt='%Y-%m-%d %H:%M:%S' ) _log_file_handler.setFormatter(formatter) logger.addHandler(_log_file_handler) # Silence the root logger for the entire session so third-party libraries # that add StreamHandlers to it cannot produce duplicate "INFO:..." lines in our logs. # Setting level above CRITICAL blocks all messages at the source, # regardless of how many handlers any library adds to the root logger. # Our named logger is unaffected as it has its own independent level. logging.getLogger().handlers.clear() logging.getLogger().addHandler(logging.NullHandler()) logging.getLogger().setLevel(logging.CRITICAL + 1) # Redirect stdout and stderr _original_stdout = sys.stdout _original_stderr = sys.stderr sys.stdout = _LogCapture(_original_stdout, logger, logging.INFO) sys.stderr = _LogCapture(_original_stderr, logger, logging.ERROR) # Log start message logger.info(f"Logging started for: {title} (Session {next_session})") logger.info(f"Log file: {log_filepath}") # ---------- SETUP SUCCESS/ERROR COLOR HANDLERS ---------- global _script_had_error # Declare at start of this section # Store original exception handler original_excepthook = sys.excepthook def set_success_color(): """Change terminal to green on successful completion""" global _original_stdout, _original_stderr, _script_had_error, _log_file_handler # Don't show success if there was an error if _script_had_error: return # LOG SUCCESS MESSAGE directly to file (before restoring streams) if _log_file_handler: try: # Create a log record manually record = logging.LogRecord( name="completion", level=logging.INFO, pathname="", lineno=0, msg="Script execution completed successfully", args=(), exc_info=None ) _log_file_handler.emit(record) _log_file_handler.flush() # Ensure it's written except Exception as e: print(f"Could not write script completion status to log file: {e}") # NOW restore original stdout/stderr if _original_stdout: sys.stdout = _original_stdout sys.stderr = _original_stderr # Change color (terminal only) if platform.system() == "Windows": os.system('color 2F') # Green background, white text elif platform.system() == "Linux": print("\033[97m\033[42m", end="", flush=True) # Green bg, white text def set_error_color(exc_type, exc_value, exc_traceback): """Change terminal to red on unhandled exception""" global _original_stdout, _original_stderr, _script_had_error # Mark that an error occurred _script_had_error = True # LOG THE EXCEPTION FIRST (while stderr is still redirected to log) error_msg = ''.join(traceback.format_exception(exc_type, exc_value, exc_traceback)) print(error_msg, file=sys.stderr, end='') # NOW restore original stdout/stderr if _original_stdout: sys.stdout = _original_stdout sys.stderr = _original_stderr # Change color if platform.system() == "Windows": os.system('color 4F') # Red background, white text elif platform.system() == "Linux": print("\033[97m\033[41m", end="", flush=True) # Red bg, white text # DON'T call original_excepthook - we already printed the error! # (Calling it would print the error a second time) # Register success handler (runs on normal exit) atexit.register(set_success_color) # Register error handler (runs on unhandled exception) sys.excepthook = set_error_color
[docs] def minute(): """ Get current minute. Returns: int: Current minute (0-59) Example: minute() # 23 """ return time.localtime().tm_min
[docs] def month(): """ Get current month. Returns: int: Current month (1-12) Example: month() # 2 (February) """ return time.localtime().tm_mon
[docs] def press(*keys): """ Press keyboard keys with support for Selenium, PyAutoGUI and key combinations. Modes: 1. PyAutoGUI single key: (key) 2. PyAutoGUI key N times: (key, count) 3. PyAutoGUI key combination: (key1, key2, ...) 4. Selenium driver key: (driver, key) 5. Selenium driver key N times: (driver, key, count) 6. Selenium driver key combination: (driver, key1, key2, ...) 7. Selenium element key: (driver, selector_type, selector, key) Args: *keys: Variable arguments for key presses Returns: True if successful, False otherwise Example: :: # PyAutoGUI keys (no driver needed) press("tab") # Single key press("tab", 5) # Press 5 times press("tab", -5) # Press 5 times with shift held press("ctrl", "a") # Two-key combo press("alt", "ctrl", "z") # Three-key combo press("num5") # Numpad 5 press("volumeup") # Volume up press("mute") # Volume mute (short form) press("back") # Browser back (short form) press("forward") # Browser forward (short form) # Selenium driver keys (pass driver object) press(driver, "tab") press(driver, "tab", 5) # Press tab 5 times press(driver, "tab", -5) # Press shift+tab 5 times press(driver, "ctrl", "c") press(driver, "ctrl", "shift", "s") # Selenium element + key (driver, selector_type, selector, key) press(driver, "xpath", "//input", "enter") press(driver, "id", "username", "tab") Note: - Negative count presses the key with Shift held (e.g. Shift+Tab for reverse navigation). - PyAutoGUI-only keys (num0-9, volumeup, volumedown, mute, back, forward, etc.) are not supported in Selenium mode. - Short forms supported: 'back' for browserback, 'forward' for browserforward, 'mute' for volumemute. """ # Key mappings for special keys key_map = { 'enter': Keys.RETURN, 'tab': Keys.TAB, 'up': Keys.ARROW_UP, 'down': Keys.ARROW_DOWN, 'left': Keys.ARROW_LEFT, 'right': Keys.ARROW_RIGHT, 'alt': Keys.ALT, 'backspace': Keys.BACKSPACE, 'esc': Keys.ESCAPE, 'escape': Keys.ESCAPE, 'home': Keys.HOME, 'end': Keys.END, 'insert': Keys.INSERT, 'delete': Keys.DELETE, 'pageup': Keys.PAGE_UP, 'pagedown': Keys.PAGE_DOWN, 'shift': Keys.SHIFT, 'ctrl': Keys.CONTROL, 'control': Keys.CONTROL, 'space': Keys.SPACE, 'pause': Keys.PAUSE, 'f1': Keys.F1, 'f2': Keys.F2, 'f3': Keys.F3, 'f4': Keys.F4, 'f5': Keys.F5, 'f6': Keys.F6, 'f7': Keys.F7, 'f8': Keys.F8, 'f9': Keys.F9, 'f10': Keys.F10, 'f11': Keys.F11, 'f12': Keys.F12, 'windows': Keys.COMMAND, 'command': Keys.COMMAND } # PyAutoGUI-only keys (not supported in Selenium) pyautogui_only_keys = { 'num0', 'num1', 'num2', 'num3', 'num4', 'num5', 'num6', 'num7', 'num8', 'num9', 'numlock', 'volumeup', 'volumedown', 'volumemute', 'playpause', 'browserback', 'browserforward', 'printscreen', 'capslock', 'scrolllock', # Short forms 'back', 'forward', 'mute' } # Short form mappings for PyAutoGUI keys pyautogui_short_forms = { 'back': 'browserback', 'forward': 'browserforward', 'mute': 'volumemute' } # Valid PyAutoGUI keys (comprehensive list) to check valid keys valid_pyautogui_keys = { '\t', '\n', '\r', ' ', '!', '"', '#', '$', '%', '&', "'", '(', ')', '*', '+', ',', '-', '.', '/', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '<', '=', '>', '?', '@', '[', '\\', ']', '^', '_', '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '{', '|', '}', '~', 'accept', 'add', 'alt', 'altleft', 'altright', 'apps', 'backspace', 'browserback', 'browserfavorites', 'browserforward', 'browserhome', 'browserrefresh', 'browsersearch', 'browserstop', 'capslock', 'clear', 'convert', 'ctrl', 'ctrlleft', 'ctrlright', 'decimal', 'del', 'delete', 'divide', 'down', 'end', 'enter', 'esc', 'escape', 'execute', 'f1', 'f10', 'f11', 'f12', 'f13', 'f14', 'f15', 'f16', 'f17', 'f18', 'f19', 'f2', 'f20', 'f21', 'f22', 'f23', 'f24', 'f3', 'f4', 'f5', 'f6', 'f7', 'f8', 'f9', 'final', 'fn', 'hanguel', 'hangul', 'hanja', 'help', 'home', 'insert', 'junja', 'kana', 'kanji', 'launchapp1', 'launchapp2', 'launchmail', 'launchmediaselect', 'left', 'modechange', 'multiply', 'nexttrack', 'nonconvert', 'num0', 'num1', 'num2', 'num3', 'num4', 'num5', 'num6', 'num7', 'num8', 'num9', 'numlock', 'pagedown', 'pageup', 'pause', 'pgdn', 'pgup', 'playpause', 'prevtrack', 'print', 'printscreen', 'prntscrn', 'prtsc', 'prtscr', 'return', 'right', 'scrolllock', 'select', 'separator', 'shift', 'shiftleft', 'shiftright', 'sleep', 'space', 'stop', 'subtract', 'tab', 'up', 'volumedown', 'volumemute', 'volumeup', 'win', 'winleft', 'winright', 'yen', 'command', 'option', 'optionleft', 'optionright', # Short forms 'back', 'forward', 'mute' } try: # ------------------------------------------------------------ # SELENIUM MODE - Check if first arg is WebDriver object # needs: nothing : works on all platforms # ------------------------------------------------------------ if len(keys) > 0 and hasattr(keys[0], 'find_element'): driver_obj = keys[0] # ---------------------------------------------------------- # SELENIUM ELEMENT + KEY (driver, selector_type, selector, key) # ---------------------------------------------------------- if len(keys) >= 4 and keys[1] in ('id', 'name', 'xpath', 'text', 'partial', 'tag', 'class', 'css'): selector_type = keys[1] selector = keys[2] key_name = keys[3].lower() # Check if it's a PyAutoGUI-only key if key_name in pyautogui_only_keys: print(f"Warning: '{key_name}' key not supported in Selenium. Use PyAutoGUI instead.") return False key = key_map.get(key_name, key_name) # Use mapping or raw key # Find element and send key element = _get_web_element(driver_obj, selector_type, selector) if element: element.send_keys(key) return True else: print(f"Element not found with {selector_type}: {selector}") return False # ---------------------------------------------------------- # SELENIUM DRIVER KEYS (driver + keys) # ---------------------------------------------------------- else: action = ActionChains(driver_obj) # Single key on driver: press(driver, "tab") if len(keys) == 2: key_name = keys[1].lower() # Check if it's a PyAutoGUI-only key if key_name in pyautogui_only_keys: print( f"Warning: '{key_name}' key not supported in Selenium. Use PyAutoGUI press('{key_name}') instead.") return False key = key_map.get(key_name, keys[1]) # Use mapping or raw string action.send_keys(key).perform() return True # Key + count: press(driver, "tab", 5) elif len(keys) == 3 and isinstance(keys[2], int): count = keys[2] key_name = keys[1].lower() # Check if it's a PyAutoGUI-only key if key_name in pyautogui_only_keys: print(f"Warning: '{key_name}' key not supported in Selenium.") return False key = key_map.get(key_name, keys[1]) # Positive count: press normally if count >= 0: for _ in range(count): action.send_keys(key).perform() time.sleep(1) # Negative count: press with shift held else: for _ in range(abs(count)): action.key_down(Keys.SHIFT).send_keys(key).key_up(Keys.SHIFT).perform() time.sleep(1) return True # Multiple keys (combination): press(driver, "ctrl", "c") elif len(keys) >= 3: # Check if any key is PyAutoGUI-only for i in range(1, len(keys)): if keys[i].lower() in pyautogui_only_keys: print(f"Warning: '{keys[i]}' key not supported in Selenium.") return False # Press all keys down in order for i in range(1, len(keys)): key_name = keys[i].lower() key = key_map.get(key_name, keys[i]) # If it's a modifier key, hold it down if key in (Keys.CONTROL, Keys.SHIFT, Keys.ALT, Keys.COMMAND): action.key_down(key) else: # Regular key - just send it action.send_keys(key) # Release all modifier keys in reverse order for i in range(len(keys) - 1, 0, -1): key_name = keys[i].lower() key = key_map.get(key_name, keys[i]) if key in (Keys.CONTROL, Keys.SHIFT, Keys.ALT, Keys.COMMAND): action.key_up(key) action.perform() return True # ------------------------------------------------------------ # PYAUTOGUI MODE - No driver object # needs: display # ------------------------------------------------------------ else: if not _GUI_AVAILABLE: print("Error: press() requires a display.") return False # Single key press if len(keys) == 1: key_name = keys[0].lower() # Translate short form to full form actual_key = pyautogui_short_forms.get(key_name, keys[0]) # Validate key if actual_key.lower() not in valid_pyautogui_keys and len(actual_key) > 1: print(f"Error: Invalid key '{keys[0]}'") return False pyautogui.press(actual_key) return True # Key + count (press n times) elif len(keys) == 2 and isinstance(keys[1], int): count = keys[1] key_name = keys[0].lower() # Translate short form to full form actual_key = pyautogui_short_forms.get(key_name, keys[0]) # Validate key if actual_key.lower() not in valid_pyautogui_keys and len(actual_key) > 1: print(f"Error: Invalid key '{keys[0]}'") return False # Positive count: press normally if count >= 0: for _ in range(count): pyautogui.press(actual_key) time.sleep(1) # Negative count: press with shift held else: pyautogui.keyDown('shift') for _ in range(abs(count)): pyautogui.press(actual_key) time.sleep(1) pyautogui.keyUp('shift') return True # Multiple key combination (hotkey) else: # Translate short forms to full forms and validate translated_keys = [] for key in keys: key_name = key.lower() if isinstance(key, str) else key translated_key = pyautogui_short_forms.get(key_name, key) # Validate each key if translated_key.lower() not in valid_pyautogui_keys and len(str(translated_key)) > 1: print(f"Error: Invalid key '{key}' in combination") return False translated_keys.append(translated_key) pyautogui.hotkey(*translated_keys) return True except Exception as e: print(f"Error pressing keys {keys}: {e}") return False
[docs] def read(*args): """ Extract text from screen (using OCR), files (by parsing file format) or a Selenium browser window. Modes: 1. No arguments: OCR full screen 2. 2 integers: OCR from (x, y) to bottom-right corner 3. 4 integers: OCR specific region (x, y, width, height) 4. 1 string: Read file by parsing its format 5. 1 driver object: Take screenshot of Selenium browser and read using OCR Supported file formats: - Documents: PDF, DOCX, PPTX, ODT, RTF - Tabular: CSV, TSV, XLSX, SQLite - Structured: JSON, YAML, XML, INI/CFG - Text: TXT, LOG, MD - Web: HTML - Email: EML, MSG - eBooks: EPUB - Scripts: SH, BAT, PY Args: *args: Variable arguments depending on mode Returns: str: Extracted text or None if error Example: :: # OCR - Read entire screen text = read() # OCR - Read from (100, 200) to bottom-right corner text = read(100, 200) # OCR - Read specific region: x=100, y=200, width=400, height=300 text = read(100, 200, 400, 300) # Selenium - Read text from browser window using OCR d1 = browser('https://example.com') text = read(d1) # File - Read with extension text = read('report.pdf') text = read('data.csv') text = read('script.py') # File - Read without extension (auto-detects) text = read('report') # Finds report.pdf automatically text = read('config') # Finds config.yaml automatically # Check if text on screen if 'login' in read(): print("Login visible!") Note: OCR first run downloads models (~100MB), subsequent runs are fast. """ # ------------------------------------------------------------ # DETERMINE MODE AND VALIDATE ARGUMENTS # ------------------------------------------------------------ is_ocr = False region = None # MODE 1: OCR - No arguments (full screen) if len(args) == 0: is_ocr = True region = None # Full screen # MODE 2: OCR - 2 integers (from x,y to bottom-right corner) elif len(args) == 2 and all(isinstance(arg, int) for arg in args): is_ocr = True x, y = args # Validate coordinates if x < 0 or y < 0: print(f"Error: Coordinates must be non-negative (x={x}, y={y})") return None # Get screen size screen_width, screen_height = pyautogui.size() # Calculate width and height to bottom-right corner width = screen_width - x height = screen_height - y if width <= 0 or height <= 0: print(f"Error: Starting point ({x}, {y}) is outside screen bounds") return None region = (x, y, width, height) # MODE 3: OCR - 4 integers (screen region: x, y, width, height) elif len(args) == 4 and all(isinstance(arg, int) for arg in args): is_ocr = True x, y, width, height = args # Validate region if width <= 0 or height <= 0: print(f"Error: Invalid region dimensions (width={width}, height={height})") return None if x < 0 or y < 0: print(f"Error: Coordinates must be non-negative (x={x}, y={y})") return None region = (x, y, width, height) # ------------------------------------------------------------ # PERFORM OCR (modes 1, 2, 3 : needs display) # ------------------------------------------------------------ if is_ocr: if not _GUI_AVAILABLE: print("Error: read() screen/OCR modes require a display.") return None try: # Get shared OCR reader reader = _get_ocr_reader() # Capture screenshot (region or full screen) if region: image = pyautogui.screenshot(region=region) else: image = pyautogui.screenshot() # Convert PIL image to numpy array image = np.array(image) cleaned = _preprocess_for_ocr(image) # Extract text using EasyOCR results = reader.readtext(cleaned, detail=0) # If no text was found if not results: print(f"OCR: No text found.") return "" # Return blank string (not None) to avoid errors in blank text case # Join all detected text with spaces text = ' '.join(results) return text.strip() except ImportError as e: print(f"Error: Required library not installed - {e}") print("Install with: pip install easyocr opencv-python") return None except Exception as e: print(f"Error during OCR: {e}") return None # ------------------------------------------------------------ # MODE 4: Selenium driver - screenshot browser and OCR # needs: nothing for screenshot : OCR runs on bytes, no display needed # ------------------------------------------------------------ elif len(args) == 1 and hasattr(args[0], 'find_element'): driver_obj = args[0] try: reader = _get_ocr_reader() # Get screenshot as bytes directly without saving to disk png_bytes = driver_obj.get_screenshot_as_png() image = np.array(Image.open(io.BytesIO(png_bytes))) cleaned = _preprocess_for_ocr(image) results = reader.readtext(cleaned, detail=0) if not results: print("OCR: No text found in browser window.") return "" text = ' '.join(results) return text.strip() except Exception as e: print(f"Could not read text from browser window: {e}") return None # ------------------------------------------------------------ # MODE 5: File reading - 1 string (file path) # needs: nothing : safe on all platforms including servers # ------------------------------------------------------------ elif len(args) == 1 and isinstance(args[0], str): file = args[0] # ------------------------------------------------------------ # AUTO-DETECT FILE EXTENSION IF NOT PROVIDED # ------------------------------------------------------------ if not os.path.splitext(file)[1]: # No extension provided # Get directory and filename if os.path.dirname(file): directory = os.path.dirname(file) filename = os.path.basename(file) else: directory = '.' filename = file # Find files matching the name try: matching_files = [f for f in os.listdir(directory) if os.path.splitext(f)[0] == filename] if len(matching_files) == 1: # Found exactly one file - use it file = os.path.join(directory, matching_files[0]) print(f"Auto-detected file: {file}") elif len(matching_files) > 1: print(f"Error: Multiple files found with name '{filename}':") for f in matching_files: print(f" - {f}") print("Please specify file extension.") return None else: print(f"Error: No file found with name '{filename}'") return None except Exception as e: print(f"Error searching for file: {e}") return None # Get file extension _, ext = os.path.splitext(file) ext = ext.lower() try: # ------------------------------------------------------------ # DOCUMENTS # ------------------------------------------------------------ # PDF files if ext == '.pdf': with open(file, 'rb') as pdf_file: pdf_reader = PdfReader(pdf_file) return '\r\n'.join([pdf_reader.pages[i].extract_text() for i in range(len(pdf_reader.pages))]) # Word documents elif ext in ['.docx', '.doc']: doc = Document(file) return '\n'.join([paragraph.text for paragraph in doc.paragraphs]) # PowerPoint presentations elif ext in ['.pptx', '.ppt']: prs = Presentation(file) text = [] for slide_num, slide in enumerate(prs.slides, 1): text.append(f"=== Slide {slide_num} ===") for shape in slide.shapes: if hasattr(shape, "text"): text.append(shape.text) text.append('') return '\n'.join(text) # OpenDocument Text (LibreOffice) elif ext == '.odt': textdoc = odf_load(file) allparas = textdoc.getElementsByType(P) return '\n'.join([teletype.extractText(p) for p in allparas]) # Rich Text Format elif ext == '.rtf': with open(file, 'r', encoding='utf-8', errors='ignore') as f: return rtf_to_text(f.read()) # ------------------------------------------------------------ # TABULAR DATA (with >>>Row_X: format) # ------------------------------------------------------------ # CSV files elif ext == '.csv': with open(file, 'r', encoding='utf-8', errors='ignore') as f: reader = csv.reader(f) rows = list(reader) if not rows: return '' text = [] # First row as header text.append('\t'.join(rows[0])) text.append('-' * 60) # Data rows with >>>Row_X: format for row_num, row in enumerate(rows[1:], 1): row_data = '\t'.join(row) text.append(f">>>Row_{row_num}:\t{row_data}") return '\n'.join(text) # TSV files (Tab-separated values) elif ext == '.tsv': with open(file, 'r', encoding='utf-8', errors='ignore') as f: reader = csv.reader(f, delimiter='\t') rows = list(reader) if not rows: return '' text = [] # First row as header text.append('\t'.join(rows[0])) text.append('-' * 60) # Data rows with >>>Row_X: format for row_num, row in enumerate(rows[1:], 1): row_data = '\t'.join(row) text.append(f">>>Row_{row_num}:\t{row_data}") return '\n'.join(text) # Excel files elif ext in ['.xlsx', '.xlsm']: wb = load_workbook(file, data_only=True) text = [] for sheet in wb.worksheets: text.append(f"\n{'=' * 60}") text.append(f"Sheet: {sheet.title}") text.append('=' * 60) rows = list(sheet.iter_rows(values_only=True)) if rows: # First row as header text.append('\t'.join([str(cell) if cell is not None else '' for cell in rows[0]])) text.append('-' * 60) # Data rows with >>>Row_X: format for row_num, row in enumerate(rows[1:], 1): row_data = '\t'.join([str(cell) if cell is not None else 'NULL' for cell in row]) text.append(f">>>Row_{row_num}:\t{row_data}") text.append('') return '\n'.join(text) # SQLite databases elif ext in ['.db', '.sqlite', '.sqlite3']: conn = sqlite3.connect(file) cursor = conn.cursor() cursor.execute("SELECT name FROM sqlite_master WHERE type='table';") tables = cursor.fetchall() text = [] for table in tables: table_name = table[0] text.append(f"\n{'=' * 60}") text.append(f"Table: {table_name}") text.append('=' * 60) # Get column names cursor.execute(f"PRAGMA table_info({table_name})") columns = [col[1] for col in cursor.fetchall()] # Add column headers text.append('\t'.join(columns)) text.append('-' * 60) # Get data (limit to 100 rows) cursor.execute(f"SELECT * FROM {table_name} LIMIT 100") rows = cursor.fetchall() # Add rows with >>>Row_X: format for row_num, row in enumerate(rows, 1): row_data = '\t'.join([str(cell) if cell is not None else 'NULL' for cell in row]) text.append(f">>>Row_{row_num}:\t{row_data}") text.append(f"\nTotal_rows: {len(rows)}\n") conn.close() return '\n'.join(text) # ------------------------------------------------------------ # STRUCTURED DATA # ------------------------------------------------------------ # JSON files elif ext == '.json': with open(file, 'r', encoding='utf-8') as f: data = json.load(f) return json.dumps(data, indent=2) # YAML files elif ext in ['.yaml', '.yml']: with open(file, 'r', encoding='utf-8') as f: data = yaml.safe_load(f) return yaml.dump(data, default_flow_style=False, sort_keys=False) # XML files elif ext == '.xml': tree = et.parse(file) return et.tostring(tree.getroot(), encoding='unicode', method='xml') # INI/CFG files elif ext in ['.ini', '.cfg']: config = configparser.ConfigParser() config.read(file) text = [] for section in config.sections(): text.append(f"[{section}]") for key, value in config.items(section): text.append(f"{key} = {value}") text.append('') return '\n'.join(text) # ------------------------------------------------------------ # TEXT FILES # ------------------------------------------------------------ # Plain text, log files, markdown elif ext in ['.txt', '.log', '.md']: with open(file, 'r', encoding='utf-8', errors='ignore') as f: return f.read() # ------------------------------------------------------------ # WEB # ------------------------------------------------------------ # HTML files elif ext in ['.html', '.htm']: with open(file, 'r', encoding='utf-8', errors='ignore') as f: soup = BeautifulSoup(f.read(), 'html.parser') return soup.get_text('\n').strip() # ------------------------------------------------------------ # EMAIL # ------------------------------------------------------------ # EML files (standard email) elif ext == '.eml': with open(file, 'r', encoding='utf-8', errors='ignore') as f: msg = email.message_from_file(f) text = [] text.append(f"From: {msg.get('From')}") text.append(f"To: {msg.get('To')}") text.append(f"Subject: {msg.get('Subject')}") text.append(f"Date: {msg.get('Date')}") text.append("\n--- Body ---") if msg.is_multipart(): for part in msg.walk(): if part.get_content_type() == "text/plain": payload = part.get_payload(decode=True) if payload: text.append(payload.decode('utf-8', errors='ignore')) else: payload = msg.get_payload(decode=True) if payload: text.append(payload.decode('utf-8', errors='ignore')) return '\n'.join(text) # MSG files (Outlook email) elif ext == '.msg': msg = extract_msg.Message(file) text = [] text.append(f">>>From: {msg.sender}") text.append(f">>>To: {msg.to}") text.append(f">>>Subject: {msg.subject}") text.append(f">>>Date: {msg.date}") text.append("\nBody ---") text.append(msg.body) msg.close() return '\n'.join(text) # ------------------------------------------------------------ # EBOOKS # ------------------------------------------------------------ # EPUB files elif ext == '.epub': book = epub.read_epub(file) text = [] for item in book.get_items_of_type(ebooklib.ITEM_DOCUMENT): soup = BeautifulSoup(item.get_content(), 'html.parser') text.append(soup.get_text()) return '\n\n'.join(text) # ------------------------------------------------------------ # SCRIPT FILES # ------------------------------------------------------------ # Shell scripts (Linux) elif ext == '.sh': with open(file, 'r', encoding='utf-8', errors='ignore') as f: return f.read() # Batch files (Windows) elif ext == '.bat': with open(file, 'r', encoding='utf-8', errors='ignore') as f: return f.read() # Python scripts elif ext == '.py': with open(file, 'r', encoding='utf-8', errors='ignore') as f: return f.read() # ------------------------------------------------------------ # UNSUPPORTED FORMAT # ------------------------------------------------------------ else: print(f"Unsupported file format: {ext}") print(f"Supported formats: PDF, DOCX, PPTX, ODT, RTF, CSV, TSV, XLSX, SQLite,") print(f" JSON, YAML, XML, INI/CFG, TXT, LOG, MD, HTML,") print(f" EML, MSG, EPUB, SH, BAT, PY") return None except Exception as e: print(f"Error reading file {file}: {e}") return None # ---------------Invalid Arguments--------------- else: print("Error: Invalid arguments for read()") print("Use: read() - OCR full screen") print(" read(x, y) - OCR from (x,y) to bottom-right") print(" read(x, y, width, height) - OCR specific region") print(" read(driver) - OCR browser window") print(" read('file.pdf') - Read file") return None
[docs] def run(target): """ Runs a file or application on Windows and Linux. Args: target: File path or application name to execute - If target is a file path: Opens with default application - If target is an application name: Launches the application - For applications, the command must be available in system PATH Raises: NotImplementedError: If called on macOS Output: - Prints error message if file or application was not found. - Prints error message if permission was denied. Example: :: # Open files with default application run("sample.txt") # Opens in default text editor run("document.pdf") # Opens in default PDF viewer run("C:\\Users\\file.xlsx") # Opens Excel file # Launch applications (Windows) run("calc") # Calculator run("notepad") # Notepad run("mspaint") # Paint # Launch applications (Linux) run("gedit") # Text editor run("firefox") # Browser run("gnome-calculator") # Calculator Note: - Windows: Uses os.startfile for files, subprocess for applications. - Linux: Uses xdg-open for files, direct execution for applications. xdg-utils is required (included in Linux dependencies). """ # Check if macOS and reject if sys.platform.startswith('darwin') or platform.system() == "Darwin": raise NotImplementedError( "run() is not supported on macOS.\n" "Supported platforms: Windows, Linux\n" "Use subprocess.Popen() directly for macOS-specific needs." ) try: # ------------------------------------------------------------ # WINDOWS # ------------------------------------------------------------ if sys.platform.startswith('win'): if os.path.isfile(target): # File exists - open with default application # Using os.startfile is more reliable than shell=True os.startfile(target) else: # Assume it's an application name # Use shell=True to allow PATH resolution subprocess.Popen(target, shell=True) # ------------------------------------------------------------ # LINUX # ------------------------------------------------------------ elif sys.platform.startswith('linux'): if os.path.isfile(target): # File exists - open with default application using xdg-open item_path = os.path.abspath(target) subprocess.Popen(['xdg-open', item_path]) else: # Assume it's an application name - try to execute directly subprocess.Popen([target]) else: # Unknown platform raise NotImplementedError( f"run() is not supported on platform: {sys.platform}\n" "Supported platforms: Windows (win32), Linux (linux)" ) except FileNotFoundError: print(f"Error: Application or file not found: '{target}'") print("For applications, ensure the command is in your system PATH.") except PermissionError: print(f"Error: Permission denied to run: '{target}'") print("Check file permissions or try running with appropriate privileges.") except Exception as e: print(f"Error running '{target}': {e}")
[docs] def say(text, volume=1.0): """ Speak text using offline Text-to-Speech via Piper TTS. Args: text: Text to speak volume: Volume level 0.0 to 1.0 (default: 1.0) Returns: None Example: :: say("Hello, how are you?") say("Download complete.") say("Error occurred, please try again.", volume=0.7) say("Warning: Low battery.", volume=0.5) Note: - Automatically logs spoken text when log_setup() is active. - Requires: pip install piper-tts huggingface_hub - Linux requires: sudo apt install espeak-ng alsa-utils - Model files are saved in: Windows → %LOCALAPPDATA%\\autocore\\piper_models\\ Linux → ~/.local/share/autocore/piper_models/ - Model size is approximately 60MB, downloaded once and reused. - Browse all voices at: https://huggingface.co/rhasspy/piper-voices """ # ------------------------------------------------------------ # GUARD — needs audio system # _AUDIO_AVAILABLE is set at file level during library import. # if audio init failed (headless server, no sound drivers etc.) # we bail out early instead of crashing deep inside piper # ------------------------------------------------------------ if not _AUDIO_AVAILABLE: print("Error: say() requires an audio system.") return # ensure caller passed a string — piper will crash on anything else if not isinstance(text, str): raise TypeError("Text to speak must be a string") # piper's SynthesisConfig will silently clamp or error outside this range if not (0.0 <= volume <= 1.0): raise ValueError("Volume must be between 0.0 and 1.0") try: # ------------------------------------------------------------ # LAZY IMPORTS # kept here intentionally and NOT moved to the top of the file # because: # - piper and huggingface_hub are optional heavy dependencies # if not installed, the rest of the library still works fine # - winsound is Windows-only, importing at file level would # crash on Linux # ------------------------------------------------------------ from piper.voice import PiperVoice, SynthesisConfig # ------------------------------------------------------------ # MODEL PATHS # models are stored in a user-writable location on each platform # so we never hit permission errors writing into Program Files # or other system-protected directories # # Windows → C:\Users\YourUsername\AppData\Local\autocore\piper_models\ # Linux → /home/yourusername/.local/share/autocore/piper_models/ # # os.environ["LOCALAPPDATA"] on Windows always points to the # current user's AppData\Local folder which is always writable # without admin rights, unlike C:\Program Files\ # ------------------------------------------------------------ model_name = "en_US-libritts_r-medium.onnx" model_json = "en_US-libritts_r-medium.onnx.json" if platform.system() == "Windows": model_dir = Path(os.environ["LOCALAPPDATA"]) / "autocore" / "piper_models" else: model_dir = Path.home() / ".local" / "share" / "autocore" / "piper_models" model_path = model_dir / model_name # full path to the .onnx model model_json_path = model_dir / model_json # full path to the .json config # ------------------------------------------------------------ # VALIDATION HELPERS # defined as inner functions so they can access model_dir # from the enclosing scope without passing it as an argument # ------------------------------------------------------------ def _is_valid_onnx(path): # valid onnx must exist AND be at least 50MB # we know the model is ~60MB so anything under 50MB # means the download was interrupted or corrupted p = Path(path) return p.exists() and p.stat().st_size > 50 * 1024 * 1024 # 50MB in bytes def _is_valid_json(path): # valid json config must exist, be non-empty, and parse # without errors — a corrupt json would crash piper at # load time so we catch it here and re-download instead p = Path(path) if not p.exists() or p.stat().st_size == 0: return False try: with open(p, 'r', encoding='utf-8') as f: json.load(f) # json already imported at file level return True except Exception: return False def _download_file(filename, is_onnx=False): # downloads one file from HuggingFace into model_dir # hf_hub_download saves into HuggingFace's own cache first # (~/.cache/huggingface/) then we move it to piper_models/ # retries up to 3 times for network hiccups or incomplete downloads # returns the final Path if successful, None if all retries failed try: from huggingface_hub import hf_hub_download # lazy — optional dependency except ImportError: print("Error: huggingface_hub not installed. Run: pip install huggingface_hub") return None max_retries = 3 for attempt in range(1, max_retries + 1): try: # suppress HuggingFace's own progress/warning output # so our print messages stay clean and readable with warnings.catch_warnings(): warnings.simplefilter("ignore") cached_path = hf_hub_download( repo_id="rhasspy/piper-voices", filename=f"en/en_US/libritts_r/medium/{filename}", # path inside the HuggingFace repo local_dir=str(model_dir), ) # hf_hub_download may save into a subdirectory inside model_dir # move it to model_dir root so our fixed model_path always finds it final_path = model_dir / filename if os.path.abspath(cached_path) != os.path.abspath(final_path): shutil.move(cached_path, final_path) # move from hf cache subdir to model_dir root # validate the downloaded file before declaring success # size check for onnx, json parse check for config file valid = _is_valid_onnx(final_path) if is_onnx else _is_valid_json(final_path) if valid: return final_path # download succeeded and file is healthy # file exists but failed validation — delete it so # the next attempt starts a completely fresh download print(f"Attempt {attempt}/{max_retries}: {filename} appears incomplete, retrying...") if final_path.exists(): final_path.unlink() # delete corrupt/incomplete file if Path(cached_path).exists(): Path(cached_path).unlink() # also clean up the hf cache copy except Exception as e: print(f"Attempt {attempt}/{max_retries} failed: {e}") final_path = model_dir / filename if final_path.exists(): final_path.unlink() # clean up any partial download if attempt < max_retries: print("Retrying in 3 seconds...") time.sleep(3) # brief pause before retry to let network recover print(f"Error: Failed to download {filename} after {max_retries} attempts.") return None # ------------------------------------------------------------ # DOWNLOAD MODEL IF MISSING OR INCOMPLETE # both files are checked independently — if only the json is # missing or corrupt we skip re-downloading the 60MB onnx and # just fetch the small config file again # ------------------------------------------------------------ onnx_valid = _is_valid_onnx(model_path) json_valid = _is_valid_json(model_json_path) if not onnx_valid or not json_valid: print("Piper voice model not found or incomplete. Downloading...") model_dir.mkdir(parents=True, exist_ok=True) # create piper_models/ if it doesn't exist yet if not onnx_valid: if model_path.exists(): print(f"Removing incomplete {model_name}...") model_path.unlink() # delete the corrupt file before re-downloading print(f"Downloading {model_name} (~60MB)...") if _download_file(model_name, is_onnx=True) is None: return # download failed after all retries, abort print(f"{model_name} ready.") if not json_valid: if model_json_path.exists(): print(f"Removing incomplete {model_json}...") model_json_path.unlink() # delete the corrupt config before re-downloading print(f"Downloading {model_json}...") if _download_file(model_json, is_onnx=False) is None: return # download failed after all retries, abort print(f"{model_json} ready.") print("Piper model ready.") # ------------------------------------------------------------ # SYNTHESIZE SPEECH # PiperVoice.load() reads the .onnx model into memory # SynthesisConfig carries volume into piper so it scales # the audio during synthesis — no manual audio work needed # synthesize_wav() runs the neural network and writes raw # PCM audio into the wav file handle # ------------------------------------------------------------ voice = PiperVoice.load(str(model_path)) # load model from piper_models/ syn_config = SynthesisConfig(volume=volume) # package volume setting for piper # delete=False because the file must persist after the 'with' # block closes so aplay/winsound can open and play it — # we manually delete it ourselves after playback is done with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp: tmp_path = tmp.name # grab the path before the handle closes with wave.open(tmp_path, "wb") as wav_file: voice.synthesize_wav(text, wav_file, syn_config=syn_config) # neural network runs here # ------------------------------------------------------------ # PLAY AUDIO — platform specific # each OS has its own built-in audio playback tool: # Linux — aplay (part of alsa-utils, -q means quiet/no output) # Windows — winsound (Python standard library, Windows only) # ------------------------------------------------------------ if platform.system() == "Linux": os.system(f"aplay -q {tmp_path}") elif platform.system() == "Windows": import winsound # lazy — Windows only winsound.PlaySound(tmp_path, winsound.SND_FILENAME) # temp WAV is deleted immediately after playback — # only the model files in piper_models/ are kept permanently if os.path.exists(tmp_path): os.remove(tmp_path) except ImportError: print("Error: piper-tts not installed. Run: pip install piper-tts") return except Exception as e: print(f"Error using piper TTS: {e}") return # integrates with log_setup() if active — # provides an audit trail of everything spoken during a session print(f"Spoken: {text}")
[docs] def screenshot(*args): """ Takes a screenshot and saves it to the current working directory. Modes: 1. Full screen, auto-named: () 2. Full screen, custom name: (filename) 3. From (x,y) to screen edge, auto-named: (x, y) 4. From (x,y) to screen edge, custom name: (x, y, filename) 5. Specific region, auto-named: (x, y, width, height) 6. Specific region, custom name: (x, y, width, height, filename) 7. Selenium variants of all above: (driver, ...) Args: *args: Variable arguments depending on usage :: - () : Full screen, auto-named - (filename) : Full screen, custom filename - (x, y) : From (x,y) to screen edge, auto-named - (x, y, filename) : From (x,y) to screen edge, custom filename - (x, y, width, height) : Specific region, auto-named - (x, y, width, height, filename) : Specific region, custom filename - (driver, ...) : Same as above but captures from Selenium browser Where: - driver: Selenium WebDriver instance - x, y: Top-left corner coordinates of the screenshot region - width, height: Dimensions of the screenshot region - filename: Custom name to save the screenshot - .png extension is added automatically if not provided - If not provided, auto-generates: screenshot_YYYY-MM-DD_HH-MM-SS_<unix>.png Example: screenshot_2025-02-18_14-30-45_1708268445.png Returns: True if successful, False otherwise Output: - Prints the full path of the saved screenshot on success. - Prints error message if invalid arguments or coordinates are provided. Example: :: # Full screen (PyAutoGUI) screenshot() # Full screen, auto-named screenshot('desktop.png') # Full screen, custom name # Selenium full page screenshot(driver) # Selenium full page, auto-named screenshot(driver, 'webpage.png') # Selenium full page, custom name # From top-left point to edge (PyAutoGUI) screenshot(100, 200) # From (100,200) to edge, auto-named screenshot(100, 200, 'crop.png') # From (100,200) to edge, custom name # From top-left point to edge (Selenium) screenshot(driver, 100, 200) # Selenium from (100,200) to edge screenshot(driver, 100, 200, 'page.png') # Selenium, custom name # Specific region (PyAutoGUI) screenshot(0, 0, 500, 300) # Region: top-left (0,0), 500x300, auto-named screenshot(0, 0, 500, 300, 'region.png') # Region: top-left (0,0), 500x300, custom name # Specific region (Selenium) screenshot(driver, 0, 0, 800, 600) # Selenium region, auto-named screenshot(driver, 0, 0, 800, 600, 'sel.png') # Selenium region, custom name """ try: # ------------------------------------------------------------ # DETERMINE MODE : Selenium or PyAutoGUI # ------------------------------------------------------------ if len(args) > 0 and hasattr(args[0], 'save_screenshot'): # First argument is a WebDriver object use_driver = True driver_obj = args[0] remaining_args = args[1:] else: # PyAutoGUI mode : guard display check here if not _GUI_AVAILABLE: print("Error: screenshot() requires a display.") return False use_driver = False driver_obj = None remaining_args = args # Default values x, y, width, height, filename = None, None, None, None, None # ------------------------------------------------------------ # PARSE ARGUMENTS # ------------------------------------------------------------ if len(remaining_args) == 0: # screenshot() or screenshot(driver) # Full screen, auto-named x, y, width, height, filename = 0, 0, None, None, None elif len(remaining_args) == 1: # screenshot('file.png') or screenshot(driver, 'file.png') # Full screen, custom name x, y, width, height = 0, 0, None, None filename = remaining_args[0] elif len(remaining_args) == 2: # screenshot(100, 200) or screenshot(driver, 100, 200) # From (x,y) to screen edge, auto-named x, y = remaining_args[0], remaining_args[1] width, height, filename = None, None, None elif len(remaining_args) == 3: # screenshot(100, 200, 'file.png') or screenshot(driver, 100, 200, 'file.png') # From (x,y) to screen edge, custom name x, y, filename = remaining_args[0], remaining_args[1], remaining_args[2] width, height = None, None elif len(remaining_args) == 4: # screenshot(0, 0, 500, 300) or screenshot(driver, 0, 0, 500, 300) # Specific region, auto-named x, y, width, height = remaining_args[0], remaining_args[1], remaining_args[2], remaining_args[3] filename = None elif len(remaining_args) == 5: # screenshot(0, 0, 500, 300, 'file.png') or screenshot(driver, 0, 0, 500, 300, 'file.png') # Specific region, custom name x, y, width, height, filename = remaining_args else: raise ValueError("Too many arguments") # Convert coordinates to integers x = int(x) if x is not None else 0 y = int(y) if y is not None else 0 # ------------------------------------------------------------ # GET SCREEN DIMENSIONS # ------------------------------------------------------------ if use_driver: screen_width = driver_obj.execute_script("return window.innerWidth") screen_height = driver_obj.execute_script("return window.innerHeight") else: screen_width, screen_height = pyautogui.size() # Calculate width and height if not provided if width is None: width = screen_width - x # From x to right edge if height is None: height = screen_height - y # From y to bottom edge # Convert to integers width, height = int(width), int(height) # Validate if width <= 0 or height <= 0: raise ValueError("Width and height must be positive") if x < 0 or y < 0: raise ValueError("Coordinates must be non-negative") # ------------------------------------------------------------ # GENERATE FILENAME # ------------------------------------------------------------ if filename is None: now = datetime.datetime.now() timestamp = now.strftime("%Y-%m-%d_%H-%M-%S") unix_time = int(now.timestamp()) filename = f"screenshot_{timestamp}_{unix_time}.png" else: # Add .png extension if missing if not os.path.splitext(filename)[1]: filename += '.png' # Get full path full_path = os.path.join(os.getcwd(), filename) # ------------------------------------------------------------ # TAKE SCREENSHOT # ------------------------------------------------------------ if use_driver: if x == 0 and y == 0 and width == screen_width and height == screen_height: # Full browser window, no cropping needed driver_obj.save_screenshot(full_path) else: # Region crop requested png_bytes = driver_obj.get_screenshot_as_png() image = Image.open(io.BytesIO(png_bytes)) img_width, img_height = image.size crop_x2 = min(x + width, img_width) crop_y2 = min(y + height, img_height) cropped = image.crop((x, y, crop_x2, crop_y2)) cropped.save(full_path) else: # PyAutoGUI: capture region directly img = pyautogui.screenshot(region=(x, y, width, height)) img.save(full_path) print(f"Screenshot saved: {full_path}") return True except ValueError as e: print(f"Invalid input: {e}") return False except RuntimeError as e: print(f"Error: {e}") return False except Exception as e: print(f"Error taking screenshot: {e}") return False
[docs] def scroll(*args, timeout=30): """ Universal scroll function for both PyAutoGUI and Selenium. Args: *args: Variable arguments (see examples below) timeout: Max seconds when scrolling to 'bottom'/'top' (default: 30) Returns: True if successful, False otherwise Output: - Prints scroll direction and count on completion. - Prints progress every 10 scrolls for large scroll counts. - Prints confirmation when bottom or top is reached (Selenium). Example: :: # PyAutoGUI Examples (scroll any window): scroll() # Scroll down 1 time (default) scroll(5) # Scroll down 5 times scroll('down') # Scroll down 1 time scroll('down', 10) # Scroll down 10 times scroll('up', 5) # Scroll up 5 times scroll('bottom') # Scroll down continuously for 30 seconds scroll('bottom', timeout=60) # Scroll down continuously for 60 seconds scroll('top') # Scroll up continuously for 30 seconds # Selenium Examples (pass driver object): scroll(driver) # Scroll down 1 time in browser scroll(driver, 5) # Scroll down 5 times in browser scroll(driver, 'down') # Scroll down 1 time in browser scroll(driver, 'down', 10) # Scroll down 10 times in browser scroll(driver, 'up', 5) # Scroll up 5 times in browser scroll(driver, 'bottom') # Scroll to bottom (auto-detect end) scroll(driver, 'top') # Scroll to top (auto-detect end) scroll(driver, 'bottom', timeout=120) # Scroll to bottom, max 2 minutes scroll(driver, 'Login') # Scroll to 1st instance of 'Login' scroll(driver, 'Login', 2) # Scroll to 2nd instance of 'Login' scroll(driver, 'Login', -1) # Scroll to last instance of 'Login' scroll(driver, 'Login', -2) # Scroll to 2nd last instance of 'Login' """ wait = 3 # Fixed wait time between scrolls (3 seconds) # --------------- PARSE ARGUMENTS --------------- if len(args) == 0: # scroll() - default: scroll down 1 time with PyAutoGUI use_selenium = False driver_obj = None direction = 'down' count = 1 text_mode = False elif len(args) > 0 and hasattr(args[0], 'execute_script'): # Selenium mode - first arg is WebDriver object use_selenium = True driver_obj = args[0] text_mode = False if len(args) == 1: # scroll(driver) - scroll down 1 time direction = 'down' count = 1 elif isinstance(args[1], int): # scroll(driver, 5) - scroll down 5 times direction = 'down' count = args[1] elif args[1] in ['down', 'up', 'bottom', 'top']: # scroll(driver, 'down') or scroll(driver, 'down', 10) direction = args[1] count = args[2] if len(args) > 2 else 1 elif isinstance(args[1], str): # scroll(driver, 'Login') or scroll(driver, 'Login', 2) or scroll(driver, 'Login', -1) text_mode = True search_text = args[1] index = args[2] if len(args) > 2 else 1 else: print(f"Error: Invalid argument '{args[1]}'. Use 'down', 'up', 'bottom', 'top' or a number.") return False else: # PyAutoGUI mode - guard display check here if not _GUI_AVAILABLE: print("Error: scroll() requires a display.") return False use_selenium = False driver_obj = None text_mode = False if isinstance(args[0], int): # scroll(5) - scroll down 5 times direction = 'down' count = args[0] elif args[0] in ['down', 'up', 'bottom', 'top']: # scroll('down') or scroll('down', 10) direction = args[0] count = args[1] if len(args) > 1 else 1 else: print(f"Error: Invalid argument '{args[0]}'. Use 'down', 'up', 'bottom', 'top' or a number.") return False # Also guard the default scroll() case (no args = PyAutoGUI) if not use_selenium and not _GUI_AVAILABLE: print("Error: scroll() requires a display.") return False # Validate direction (only when not text mode) if not text_mode and direction not in ['down', 'up', 'bottom', 'top']: print(f"Error: Invalid direction '{direction}'.") return False try: # --------------- SELENIUM MODE --------------- # needs: nothing : works on all platforms if use_selenium: # --------------- SCROLL TO TEXT INSTANCE --------------- if text_mode: # Reset selection and scroll to top so find always starts from beginning driver_obj.execute_script("window.scrollTo(0, 0);") driver_obj.execute_script("window.getSelection().removeAllRanges();") # Count total instances using TreeWalker (text nodes only, no DOM corruption) total = driver_obj.execute_script(""" var text = arguments[0].toLowerCase(); var count = 0; var walker = document.createTreeWalker(document.body, NodeFilter.SHOW_TEXT, null, false); while (walker.nextNode()) { var nodeText = walker.currentNode.nodeValue.toLowerCase(); var pos = 0; while ((pos = nodeText.indexOf(text, pos)) !== -1) { count++; pos += text.length; } } return count; """, search_text) if total == 0: print(f"Error: No instances of '{search_text}' found.") return False # Resolve index (1-based positive, negative from end) if index == 0: print(f"Error: index=0 is invalid. Use 1 to {total} or -1 to -{total}.") return False elif index < 0: if abs(index) > total: print(f"Error: index={index} out of range. Found {total} instance(s) of '{search_text}'.") return False resolved_index = total + index + 1 else: if index > total: print(f"Error: index={index} out of range. Found {total} instance(s) of '{search_text}'.") return False resolved_index = index # Use window.find() resolved_index times to land on correct instance # window.find() works like Ctrl+F - highlights just the word and scrolls to it driver_obj.execute_script(""" var text = arguments[0]; var count = arguments[1]; for (var i = 0; i < count; i++) { window.find(text, false, false, true, false, false, false); } """, search_text, resolved_index) print(f"[Selenium] Scrolled to '{search_text}' (instance {resolved_index} of {total})") return True # --------------- SCROLL TO BOTTOM --------------- elif direction == 'bottom': print(f"[Selenium] Scrolling to bottom (timeout={timeout}s)...") start_time = time.time() scrolls = 0 while time.time() - start_time < timeout: driver_obj.execute_script("window.scrollBy(0, 1000);") scrolls += 1 time.sleep(1) new_height = driver_obj.execute_script("return document.documentElement.scrollHeight") scroll_position = driver_obj.execute_script("return window.pageYOffset + window.innerHeight") # Check if scroll position reached the bottom if scroll_position >= new_height: print(f"Reached bottom after {scrolls} scrolls ({time.time() - start_time:.1f}s)") return True if scrolls % 10 == 0: print(f"Scrolled {scrolls} times ({time.time() - start_time:.1f}s)") print(f"Scrolled {scrolls} times") return True # --------------- SCROLL TO TOP --------------- elif direction == 'top': print(f"[Selenium] Scrolling to top...") driver_obj.execute_script("window.scrollTo(0, 0);") time.sleep(wait) print("Scrolled to top") return True # --------------- SCROLL DOWN / UP N TIMES --------------- else: scroll_pixels = 500 if direction == 'down' else -500 print(f"[Selenium] Scrolling {direction} {count} time(s)...") for i in range(count): driver_obj.execute_script(f"window.scrollBy(0, {scroll_pixels});") time.sleep(wait) if count > 10 and (i + 1) % 10 == 0: print(f"Scrolled {i + 1}/{count} times") print(f"Scrolled {direction} {count} time(s)") return True # --------------- PYAUTOGUI MODE --------------- # needs: display (already guarded above) else: # --------------- SCROLL TO BOTTOM / TOP --------------- if direction in ['bottom', 'top']: scroll_amount = -500 if direction == 'bottom' else 500 print(f"[PyAutoGUI] Scrolling {direction} continuously for {timeout}s...") start_time = time.time() scrolls = 0 while time.time() - start_time < timeout: pyautogui.scroll(scroll_amount) scrolls += 1 time.sleep(wait) if scrolls % 10 == 0: print(f"Scrolled {scrolls} times ({time.time() - start_time:.1f}s)") print(f"Scrolled {direction} for {timeout}s ({scrolls} total scrolls)") return True # --------------- SCROLL DOWN / UP N TIMES --------------- else: scroll_amount = -500 if direction == 'down' else 500 print(f"[PyAutoGUI] Scrolling {direction} {count} time(s)...") for i in range(count): pyautogui.scroll(scroll_amount) time.sleep(wait) if count > 10 and (i + 1) % 10 == 0: print(f"Scrolled {i + 1}/{count} times") print(f"Scrolled {direction} {count} time(s)") return True except Exception as e: print(f"Error while scrolling: {e}") return False
[docs] def second(): """ Get current second. Returns: int: Current second (0-59) Example: second() # 45 """ return time.localtime().tm_sec
[docs] def wait(*args, countdown=True): """ Wait with countdown, wait for element or wait for color at pixel. Args: *args: Variable arguments (see examples) countdown: If True, shows countdown display (default: True) Returns: True if successful, False if error or timeout Example: :: # Countdown wait wait(5) # Wait 5 seconds with countdown wait(10, countdown=False) # Wait 10 seconds silently wait() # Wait 3 seconds (default) # Wait for element (Selenium) - pass driver object wait(driver, 'xpath', '//button') # Wait max 180s with countdown wait(driver, 'id', 'submit-btn', 10) # Wait max 10s with countdown wait(driver, 'class', 'content', 30, countdown=False) # Wait silently for 30s # Wait for color at pixel wait(100, 200, 255, 0, 0) # Wait for red (255,0,0) at (100,200) with countdown wait(100, 200, 255, 0, 0, 30) # Wait for red, max 30s with countdown wait(500, 300, 0, 255, 0, 60, countdown=False) # Wait silently """ # if no argument is passed if len(args) == 0: args = (3,) # Set default to 3 seconds # ------------------------------------------------------------ # MODE 1 : COUNTDOWN (1 argument, integer or float) # needs: nothing : safe on all platforms # ------------------------------------------------------------ if len(args) == 1 and isinstance(args[0], (int, float)): seconds = args[0] if seconds < 0: raise ValueError("Seconds must be non-negative") if seconds == 0: return True # No wait needed # Convert to int for countdown seconds_int = int(seconds) if countdown: print('Waiting:') for remaining in range(seconds_int, 0, -1): print(f'..{remaining} ', end='\r', flush=True) time.sleep(1) print('Done ') else: time.sleep(seconds) return True # ------------------------------------------------------------ # MODE 2 : WAIT FOR ELEMENT (driver object + 2 or 3 arguments) # needs: nothing : works on all platforms # ------------------------------------------------------------ elif len(args) >= 3 and hasattr(args[0], 'find_element'): # Selenium mode - driver object detected driver_obj = args[0] selector_type = args[1] selector = args[2] timeout = args[3] if len(args) == 4 else 180 # Default 180s (3 minutes) # Validate selector type if selector_type not in ['id', 'xpath', 'class', 'name', 'css', 'tag', 'text', 'partial']: raise ValueError(f"Invalid selector type: {selector_type}") if countdown: print(f'Waiting for element ({selector_type}: {selector}):') # Save original implicit wait and set to 0 for fast checking original_implicit_wait = driver_obj.timeouts.implicit_wait driver_obj.implicitly_wait(0) start_time = time.time() # Countdown approach: check element every second, print countdown for remaining in range(timeout, 0, -1): # Check if element exists using get_web_element element = _get_web_element(driver_obj, selector_type, selector) # Element found if element: elapsed = time.time() - start_time # Restore original implicit wait driver_obj.implicitly_wait(original_implicit_wait) print(f"Element found after {elapsed:.1f}s") return True # Print countdown if countdown: print(f'..{remaining} ', end='\r', flush=True) # Wait 1 second before next check time.sleep(1) # Restore original implicit wait driver_obj.implicitly_wait(original_implicit_wait) # Timeout reached print(f"Element not found after {timeout}s timeout.") return False # ------------------------------------------------------------------ # MODE 3 : WAIT FOR COLOR AT PIXEL (5 or 6 arguments, all integers) # needs: display # ------------------------------------------------------------------ elif len(args) in [5, 6] and all(isinstance(arg, int) for arg in args): if not _GUI_AVAILABLE: print("Error: wait() color mode requires a display.") return False x = args[0] y = args[1] target_r = args[2] target_g = args[3] target_b = args[4] timeout = args[5] if len(args) == 6 else 180 # Default 180s if countdown: print(f'Waiting at ({x},{y}) for color RGB({target_r},{target_g},{target_b}) :') start_time = time.time() # Countdown approach: check color every second, print countdown for remaining in range(timeout, 0, -1): try: # Check if color matches at pixel if pyautogui.pixelMatchesColor(x, y, (target_r, target_g, target_b)): elapsed = time.time() - start_time print(f"Color found after {elapsed:.1f}s") return True except Exception as e: raise RuntimeError(f"Error checking pixel color: {e}") # Print countdown if countdown: print(f'..{remaining} ', end='\r', flush=True) # Wait 1 second before next check time.sleep(1) # Timeout reached print(f"Color not found after {timeout}s timeout.") return False # ---------------Invalid Arguments--------------- else: raise ValueError("Invalid arguments for wait()")
[docs] def wait_download(timeout=1200, url=None, filename=None, download_dir=None): """ Wait for a browser-initiated download to complete or download a file directly via URL. Modes: 1. URL mode (url provided): Downloads file directly using requests. Useful for file types blocked by browsers (.exe, .msix, .msi, etc.). File is saved to Python's current working directory. 2. Monitor mode (url not provided): Monitors the downloads folder for a browser-initiated download to complete. Args: timeout: Maximum seconds to wait for download completion (default: 1200) url: Direct download URL (optional):: - If provided: Downloads file directly via requests - If None: Monitors downloads folder for browser-initiated download filename: Custom filename to save/rename the downloaded file (optional):: - With extension (e.g. "myapp.exe") : used as-is - Without extension (e.g. "myapp") : extension borrowed from original file - If None: Original filename is kept - If multiple files are downloaded, only the first completed file is renamed download_dir: Custom download directory to monitor (monitor mode only, optional):: - If provided: Uses specified path and skips all auto-detection - If None: Auto-detects using the priority order described in Note below Returns:: str: Full path of the downloaded file (always includes extension) on success False: On failure (download error, timeout, directory access issue, etc.) Output: - Prints download progress every 10 seconds showing elapsed time and file size. - Prints confirmation with final filename and saved path on completion. - Prints timeout message if download does not complete in time. Example: :: wait_download() # Monitor downloads folder wait_download(url='https://abc.com/file.msix') # Direct download via URL wait_download(url='https://abc.com/file.msix', filename='myapp') # Custom name, borrows extension wait_download(300, filename='our_log.txt') # Monitor and rename on completion wait_download(600, download_dir='/downloads') # Docker with custom path wait_download(300, download_dir='D:/MyDownloads') # Windows custom path # Use with browser() : pass driver.download_dir to guarantee alignment driver = browser('https://example.com') click(driver, 'id', 'download-button') wait_download(download_dir=driver.download_dir) Note: - When download_dir is not provided, the folder is auto-detected in this order: 1. DOWNLOAD_DIR environment variable (if set at OS level) 2. /downloads folder (if running inside Docker) 3. ~/Downloads (default fallback) - If a file was modified within the last 20 seconds before calling this function, it will be detected as a recently completed download and returned immediately. This handles cases where downloads complete very quickly before monitoring starts. """ def _resolve_final_filename(custom_name, original_name): """Resolve the final filename, borrowing extension from original if custom name has none.""" if custom_name is None: return original_name original_ext = os.path.splitext(original_name)[1] custom_base, custom_ext = os.path.splitext(custom_name) return custom_name if custom_ext else custom_base + original_ext def _rename_if_needed(directory, original_name, final_name): """Rename the downloaded file if a custom filename was provided.""" if final_name != original_name: os.rename( os.path.join(directory, original_name), os.path.join(directory, final_name) ) print(f'"{original_name}" renamed to "{final_name}"') def _format_size(bytes_count): """Convert bytes to a human-readable MB string.""" return f"{bytes_count / (1024 * 1024):.1f} MB" # ------------------------------------------------------------ # MODE 1 : Direct download via requests # ------------------------------------------------------------ if url is not None: try: original_filename = url.split('/')[-1].split('?')[0] final_filename = _resolve_final_filename(filename, original_filename) print(f"Downloading: {final_filename}") response = requests.get(url, stream=True, timeout=30) # 30s for connection/chunk timeout response.raise_for_status() deadline = time.time() + (timeout) # enforce total download time limit download_start = time.time() last_print_time = 0 total_bytes = 0 with open(final_filename, 'wb') as f: for chunk in response.iter_content(chunk_size=8192): if time.time() > deadline: print(f"Timeout after {timeout} seconds while downloading '{final_filename}'") return False f.write(chunk) total_bytes += len(chunk) elapsed_sec = int(time.time() - download_start) # Print progress every 10 seconds if elapsed_sec - last_print_time >= 10: elapsed = str(datetime.timedelta(seconds=elapsed_sec)) print(f"Downloading... {elapsed} elapsed ({_format_size(total_bytes)})") last_print_time = elapsed_sec print(f"Downloaded file saved at: {os.path.abspath(final_filename)}") return os.path.abspath(final_filename) except Exception as e: print(f"Could not download file: {e}") return False # ------------------------------------------------------------ # MODE 2 : Monitor browser downloads folder # ------------------------------------------------------------ # Determine download directory using the following priority order: # 1. User provided path via download_dir argument # 2. DOWNLOAD_DIR environment variable (if set) # 3. /downloads folder (if running inside Docker) # 4. ~/Downloads folder (default, if none of the above apply) if download_dir is not None: download_dir = str(download_dir) elif os.getenv('DOWNLOAD_DIR'): download_dir = os.getenv('DOWNLOAD_DIR') print(f'Using DOWNLOAD_DIR from environment: {download_dir}') elif os.path.exists('/.dockerenv'): download_dir = '/downloads' print(f'Docker detected, using: {download_dir}') else: download_dir = str(Path.home() / "Downloads") # Ensure the download directory exists if not os.path.exists(download_dir): print(f"Warning: Download directory does not exist: {download_dir}") print(f"Attempting to create it...") try: os.makedirs(download_dir, exist_ok=True) print(f"Created directory: {download_dir}") except Exception as e: print(f"Error: Could not create directory: {e}") return False print(f'Monitoring downloads in: {download_dir}') # Temporary file extensions used by different browsers while downloading temp_extensions = ('.crdownload', '.part', '.download', '.tmp', '.temp') # Snapshot the initial state of the downloads folder before monitoring begins try: initial_files = set(os.listdir(download_dir)) initial_temp_files = set([f for f in initial_files if f.endswith(temp_extensions)]) # Record sizes of any temp files that already exist, to detect if they are actively downloading initial_temp_sizes = {} for f in initial_temp_files: try: initial_temp_sizes[f] = os.path.getsize(os.path.join(download_dir, f)) except Exception as e: print(f"Could not get file size of {f}: {e}") except Exception as e: print(f"Error accessing download directory: {e}") return False # Wait 10 seconds before first check to allow download to start print("Checking for active downloads...") time.sleep(10) # Check if a file was already downloaded quickly (modified within last 20 seconds) try: current_files_after_wait = set(os.listdir(download_dir)) current_time = time.time() recent_files = [] for f in current_files_after_wait: if not f.endswith(temp_extensions): try: mtime = os.path.getmtime(os.path.join(download_dir, f)) age = current_time - mtime if age <= 20: recent_files.append((f, age)) except Exception as e: print(f"Could not get modification time of {f}: {e}") if recent_files: recent_files.sort(key=lambda x: x[1]) # sort by age, most recent first most_recent_file, age = recent_files[0] final_filename = _resolve_final_filename(filename, most_recent_file) _rename_if_needed(download_dir, most_recent_file, final_filename) print(f"Quick download detected: '{final_filename}' (modified {int(age)} seconds ago)") print(f"Downloaded file saved at: {os.path.join(download_dir, final_filename)}") return os.path.join(download_dir, final_filename) except Exception as e: print(f"Error checking for recent files: {e}") download_time = 10 # already waited 10 seconds above last_print_time = 0 download_started = False monitoring_files = set() completed_files_so_far = [] # tracks files already reported as completed mid-session filename_used = False # ensures filename rename only applies to first completed file while download_time < timeout: try: current_files = set(os.listdir(download_dir)) current_temp_files = set([f for f in current_files if f.endswith(temp_extensions)]) # Get current sizes of all active temp files current_temp_sizes = {} for f in current_temp_files: try: current_temp_sizes[f] = os.path.getsize(os.path.join(download_dir, f)) except Exception as e: print(f"Could not get file size of {f}: {e}") new_temp_files = current_temp_files - initial_temp_files # On the first pass (10s mark), check if any pre-existing temp files are actively growing if download_time == 10: active_old_temp_files = set() for f in initial_temp_files: if f in current_temp_sizes and f in initial_temp_sizes: if current_temp_sizes[f] != initial_temp_sizes[f]: active_old_temp_files.add(f) monitoring_files.add(f) if active_old_temp_files: download_started = True if len(active_old_temp_files) == 1: print(f"Active download detected: '{list(active_old_temp_files)[0]}'") else: print( f"{len(active_old_temp_files)} active downloads detected: {', '.join(active_old_temp_files)}") # Track any brand new temp files that appeared after monitoring started if new_temp_files: truly_new = new_temp_files - monitoring_files if truly_new: if not download_started: download_started = True monitoring_files.update(truly_new) if len(truly_new) == 1: print(f"New download started: '{list(truly_new)[0]}'") else: print(f"{len(truly_new)} new downloads started: {', '.join(truly_new)}") # Check for any newly completed files : catches mid-session completions including # small files that complete too quickly for temp file tracking to catch newly_completed = [ f for f in (current_files - initial_files) if not f.endswith(temp_extensions) and f not in completed_files_so_far ] for f in newly_completed: elapsed = str(datetime.timedelta(seconds=download_time)) if not filename_used: final_name = _resolve_final_filename(filename, f) _rename_if_needed(download_dir, f, final_name) filename_used = True else: final_name = f if filename is not None: print(f'"{f}" kept original name (filename only applies to first file)') print(f"Download completed: '{final_name}' (took {elapsed})") completed_files_so_far.append(final_name) all_monitoring = (current_temp_files & monitoring_files) | new_temp_files # Return immediately if a file just completed and no temp files are still active if newly_completed and not (current_temp_files & monitoring_files): primary = completed_files_so_far[0] print(f"Downloaded file saved at: {os.path.join(download_dir, primary)}") return os.path.join(download_dir, primary) if all_monitoring: # Still downloading : print progress every 10 seconds with size of each file if download_time - last_print_time >= 10: elapsed = str(datetime.timedelta(seconds=download_time)) if len(all_monitoring) == 1: f = list(all_monitoring)[0] size_str = _format_size(current_temp_sizes.get(f, 0)) print(f"Downloading... {elapsed} elapsed ({size_str})") else: size_parts = ', '.join( f"{f}: {_format_size(current_temp_sizes.get(f, 0))}" for f in all_monitoring ) print(f"Downloading... {elapsed} elapsed ({size_parts})") last_print_time = download_time elif download_started: # All temp files are gone : find any remaining unreported completed files new_files = current_files - initial_files remaining_completed = [ f for f in new_files if not f.endswith(temp_extensions) and f not in completed_files_so_far ] if remaining_completed: elapsed = str(datetime.timedelta(seconds=download_time)) for f in remaining_completed: if not filename_used: final_name = _resolve_final_filename(filename, f) _rename_if_needed(download_dir, f, final_name) filename_used = True else: final_name = f if filename is not None: print(f'"{f}" kept original name (filename only applies to first file)') print(f"Download completed: '{final_name}' (took {elapsed})") completed_files_so_far.append(final_name) # Return the first completed file as the primary result primary = completed_files_so_far[0] print(f"Downloaded file saved at: {os.path.join(download_dir, primary)}") return os.path.join(download_dir, primary) else: # Temp files gone but no completed file found yet : keep waiting briefly if download_time - last_print_time >= 10: elapsed = str(datetime.timedelta(seconds=download_time)) print(f"Verifying download... (elapsed: {elapsed})") last_print_time = download_time else: # No download detected yet : no size to report, just print elapsed time if download_time - last_print_time >= 10: recent_complete_files = [ f for f in current_files if not f.endswith(temp_extensions) and f not in initial_files ] if recent_complete_files: original_name = recent_complete_files[0] final_filename = _resolve_final_filename(filename, original_name) _rename_if_needed(download_dir, original_name, final_filename) print(f"Quick download detected: '{final_filename}'") print(f"Downloaded file saved at: {os.path.join(download_dir, final_filename)}") return os.path.join(download_dir, final_filename) elapsed = str(datetime.timedelta(seconds=download_time)) print(f"Waiting for download to start... {elapsed} elapsed") last_print_time = download_time except Exception as e: print(f'Error monitoring downloads: {e}') return False time.sleep(1) download_time += 1 # Report any files that completed mid-session before the timeout if completed_files_so_far: if len(completed_files_so_far) == 1: print(f"Note: 1 file completed during session: '{list(completed_files_so_far)[0]}'") else: print(f"Note: {len(completed_files_so_far)} files completed during session:") for f in completed_files_so_far: print(f" - {f}") # Timeout reached : report what was still in progress if anything if monitoring_files or download_started: current_files = set(os.listdir(download_dir)) current_temp_files = set([f for f in current_files if f.endswith(temp_extensions)]) still_downloading = current_temp_files & monitoring_files if still_downloading: if len(still_downloading) == 1: print(f"Timeout after {timeout} seconds while waiting for '{list(still_downloading)[0]}' to complete.") else: print( f"Timeout after {timeout} seconds while waiting for {len(still_downloading)} files: {', '.join(still_downloading)}") else: print(f'Timeout after {timeout} seconds. Download status unclear.') else: print(f'Timeout after {timeout} seconds. No download detected.') return False
[docs] def window(action=None, target=None, *args): """ Unified window management function for Windows and Linux. Args: action: Window operation to perform (default: 'list'):: 'list' : Get all window titles 'title' : Get active window title (or find full title if target provided) 'focus' : Bring window to foreground 'close' : Close window 'minimize' : Minimize window 'maximize' : Maximize window 'resize' : Resize window (requires width, height) 'move' : Move window (requires x, y) target: Window title or pattern (required for most actions) *args: Additional parameters (width, height for resize; x, y for move) Returns: Return type depends on action:: list/None : List of strings when action is None or 'list' title : String or None others : True if successful, False otherwise Raises: ValueError: If invalid action, missing required parameters or invalid dimensions/coordinates NotImplementedError: If called on macOS Output: - Prints error if window not found, with suggestions for similar window titles (focus action only). - Prints error if wmctrl or xdotool is not installed (Linux only). Example: :: # Get all windows (default) window() # ['Chrome', 'Notepad', 'Excel'] window('list') # ['Chrome', 'Notepad', 'Excel'] # Check if window exists if 'Chrome' in window(): print("Chrome is open!") # Get active window title window('title') # 'Google Chrome - New Tab' # Find full title containing text window('title', 'Chrome') # 'Google Chrome - New Tab' window('title', 'Note') # 'Untitled - Notepad' # Window operations window('focus', 'Chrome') # Focus window window('close', 'Notepad') # Close window window('minimize', 'Excel') # Minimize window window('maximize', 'Word') # Maximize window # Position and size window('resize', 'Chrome', 800, 600) # Resize to 800x600 window('move', 'Chrome', 100, 200) # Move to (100, 200) # Side-by-side setup (1920x1080 display) window('move', 'Chrome', 0, 0) # Left side window('resize', 'Chrome', 960, 1080) # Half screen width window('move', 'Code', 960, 0) # Right side window('resize', 'Code', 960, 1080) # Half screen width # Recording setup window('resize', 'Demo', 1280, 720) # 720p size window('move', 'Demo', 320, 180) # Centered on 1920x1080 Note: - On Linux, resize and move automatically remove maximized/minimized state before applying changes, ensuring consistent behavior. - Target matching is case-insensitive and partial (e.g. 'Chrome' matches 'Google Chrome - New Tab'). """ system = platform.system() # Check if macOS and reject if system == "Darwin": raise NotImplementedError( "window() is not supported on macOS.\n" "Supported platforms: Windows, Linux\n" "Note: Selenium functions will still work on macOS." ) # Default action: list all windows if action is None: action = 'list' action = action.lower() # Validate action valid_actions = ['list', 'title', 'focus', 'close', 'minimize', 'maximize', 'resize', 'move'] if action not in valid_actions: raise ValueError(f"Invalid action '{action}'. Valid actions: {', '.join(valid_actions)}") # Check if target is required if action in ['focus', 'close', 'minimize', 'maximize', 'resize', 'move'] and target is None: raise ValueError(f"Action '{action}' requires a target window") try: # ------------------------------------------------------------ # LIST - Get all window titles # ------------------------------------------------------------ if action == 'list': if system == "Windows": titles = [] def enum_handler(hwnd, ctx): if win32gui.IsWindowVisible(hwnd): title = win32gui.GetWindowText(hwnd) if title.strip(): titles.append(title) win32gui.EnumWindows(enum_handler, None) return titles elif system == "Linux": result = subprocess.run(['wmctrl', '-l'], capture_output=True, text=True) if result.returncode == 0: titles = [] for line in result.stdout.strip().split('\n'): parts = line.split(None, 3) if len(parts) >= 4: titles.append(parts[3]) return titles else: print("Error: wmctrl not installed.") print("Install with: sudo apt-get install wmctrl") return [] # ------------------------------------------------------------ # TITLE - Get active window title OR find window title # ------------------------------------------------------------ elif action == 'title': # No target - get active window title if target is None: if system == "Windows": title = win32gui.GetWindowText(win32gui.GetForegroundWindow()).strip() return title if title else None elif system == "Linux": result = subprocess.run(['xdotool', 'getactivewindow', 'getwindowname'], capture_output=True, text=True) if result.returncode == 0: return result.stdout.strip() else: print("Error: xdotool not installed.") print("Install with: sudo apt-get install xdotool") return None # Target provided - find full title containing target text else: all_windows = window('list') # Case-insensitive contains match for win_title in all_windows: if target.lower() in win_title.lower(): return win_title # Not found return None # ------------------------------------------------------------ # FOCUS - Bring window to foreground # ------------------------------------------------------------ elif action == 'focus': all_windows = window('list') matches = [w for w in all_windows if target.lower() in w.lower()] if not matches: print(f"Window not found: {target}") # Suggest similar windows suggestions = get_close_matches(target, all_windows, n=3, cutoff=0.6) if suggestions: print(f"Did you mean: {', '.join(suggestions)}") return False window_title = matches[0] if system == "Windows": def find_window_hwnd(title): hwnd_list = [] def enum_handler(hwnd, ctx): if win32gui.IsWindowVisible(hwnd): win_text = win32gui.GetWindowText(hwnd) if win_text == title: hwnd_list.append(hwnd) win32gui.EnumWindows(enum_handler, None) return hwnd_list[0] if hwnd_list else None hwnd = find_window_hwnd(window_title) if hwnd: win32gui.ShowWindow(hwnd, win32con.SW_RESTORE) win32gui.SetForegroundWindow(hwnd) return True return False elif system == "Linux": result = subprocess.run(['wmctrl', '-a', window_title], capture_output=True, text=True) return result.returncode == 0 # ------------------------------------------------------------ # CLOSE - Close window # ------------------------------------------------------------ elif action == 'close': all_windows = window('list') matches = [w for w in all_windows if target.lower() in w.lower()] if not matches: print(f"Window not found: {target}") return False window_title = matches[0] if system == "Windows": def find_window_hwnd(title): hwnd_list = [] def enum_handler(hwnd, ctx): if win32gui.IsWindowVisible(hwnd): win_text = win32gui.GetWindowText(hwnd) if win_text == title: hwnd_list.append(hwnd) win32gui.EnumWindows(enum_handler, None) return hwnd_list[0] if hwnd_list else None hwnd = find_window_hwnd(window_title) if hwnd: win32gui.PostMessage(hwnd, win32con.WM_CLOSE, 0, 0) return True return False elif system == "Linux": result = subprocess.run(['wmctrl', '-c', window_title], capture_output=True, text=True) return result.returncode == 0 # ------------------------------------------------------------ # MINIMIZE - Minimize window # ------------------------------------------------------------ elif action == 'minimize': all_windows = window('list') matches = [w for w in all_windows if target.lower() in w.lower()] if not matches: print(f"Window not found: {target}") return False window_title = matches[0] if system == "Windows": def find_window_hwnd(title): hwnd_list = [] def enum_handler(hwnd, ctx): if win32gui.IsWindowVisible(hwnd): win_text = win32gui.GetWindowText(hwnd) if win_text == title: hwnd_list.append(hwnd) win32gui.EnumWindows(enum_handler, None) return hwnd_list[0] if hwnd_list else None hwnd = find_window_hwnd(window_title) if hwnd: win32gui.ShowWindow(hwnd, win32con.SW_MINIMIZE) return True return False elif system == "Linux": result = subprocess.run(['wmctrl', '-l'], capture_output=True, text=True) if result.returncode == 0: for line in result.stdout.strip().split('\n'): if window_title in line: window_id = line.split()[0] subprocess.run(['xdotool', 'windowminimize', window_id]) return True return False # ------------------------------------------------------------ # MAXIMIZE - Maximize window # ------------------------------------------------------------ elif action == 'maximize': if not window('focus', target): return False time.sleep(0.5) all_windows = window('list') matches = [w for w in all_windows if target.lower() in w.lower()] if not matches: print(f"Window not found: {target}") return False window_title = matches[0] if system == "Windows": def find_window_hwnd(title): hwnd_list = [] def enum_handler(hwnd, ctx): if win32gui.IsWindowVisible(hwnd): win_text = win32gui.GetWindowText(hwnd) if win_text == title: hwnd_list.append(hwnd) win32gui.EnumWindows(enum_handler, None) return hwnd_list[0] if hwnd_list else None hwnd = find_window_hwnd(window_title) if hwnd: win32gui.ShowWindow(hwnd, win32con.SW_MAXIMIZE) return True return False elif system == "Linux": result = subprocess.run(['wmctrl', '-r', window_title, '-b', 'add,maximized_vert,maximized_horz'], capture_output=True, text=True) return result.returncode == 0 # ------------------------------------------------------------ # RESIZE - Resize window to specific dimensions # ------------------------------------------------------------ elif action == 'resize': if len(args) < 2: raise ValueError("Action 'resize' requires width and height: window('resize', 'Chrome', 800, 600)") try: width = int(args[0]) height = int(args[1]) except (ValueError, TypeError): raise ValueError("Width and height must be integers") if width <= 0 or height <= 0: raise ValueError("Width and height must be positive") if not window('focus', target): return False time.sleep(0.5) all_windows = window('list') matches = [w for w in all_windows if target.lower() in w.lower()] if not matches: print(f"Window not found: {target}") return False window_title = matches[0] if system == "Windows": def find_window_hwnd(title): hwnd_list = [] def enum_handler(hwnd, ctx): if win32gui.IsWindowVisible(hwnd): win_text = win32gui.GetWindowText(hwnd) if win_text == title: hwnd_list.append(hwnd) win32gui.EnumWindows(enum_handler, None) return hwnd_list[0] if hwnd_list else None hwnd = find_window_hwnd(window_title) if hwnd: rect = win32gui.GetWindowRect(hwnd) x, y = rect[0], rect[1] win32gui.MoveWindow(hwnd, x, y, width, height, True) return True return False elif system == "Linux": result = subprocess.run(['wmctrl', '-l'], capture_output=True, text=True) if result.returncode == 0: for line in result.stdout.strip().split('\n'): if window_title in line: window_id = line.split()[0] # Remove maximized/minimized state before resizing subprocess.run( ['wmctrl', '-i', '-r', window_id, '-b', 'remove,maximized_vert,maximized_horz']) subprocess.run(['xdotool', 'windowmap', window_id]) # restore if minimized time.sleep(0.3) subprocess.run(['wmctrl', '-i', '-r', window_id, '-e', f'0,-1,-1,{width},{height}']) return True return False # ------------------------------------------------------------ # MOVE - Move window to specific coordinates # ------------------------------------------------------------ elif action == 'move': if len(args) < 2: raise ValueError("Action 'move' requires x and y coordinates: window('move', 'Chrome', 100, 200)") try: x = int(args[0]) y = int(args[1]) except (ValueError, TypeError): raise ValueError("X and Y coordinates must be integers") if x < 0 or y < 0: raise ValueError("X and Y coordinates must be non-negative") if not window('focus', target): return False time.sleep(0.5) all_windows = window('list') matches = [w for w in all_windows if target.lower() in w.lower()] if not matches: print(f"Window not found: {target}") return False window_title = matches[0] if system == "Windows": def find_window_hwnd(title): hwnd_list = [] def enum_handler(hwnd, ctx): if win32gui.IsWindowVisible(hwnd): win_text = win32gui.GetWindowText(hwnd) if win_text == title: hwnd_list.append(hwnd) win32gui.EnumWindows(enum_handler, None) return hwnd_list[0] if hwnd_list else None hwnd = find_window_hwnd(window_title) if hwnd: rect = win32gui.GetWindowRect(hwnd) width = rect[2] - rect[0] height = rect[3] - rect[1] win32gui.MoveWindow(hwnd, x, y, width, height, True) return True return False elif system == "Linux": result = subprocess.run(['wmctrl', '-l'], capture_output=True, text=True) if result.returncode == 0: for line in result.stdout.strip().split('\n'): if window_title in line: window_id = line.split()[0] # Remove maximized/minimized state before moving subprocess.run( ['wmctrl', '-i', '-r', window_id, '-b', 'remove,maximized_vert,maximized_horz']) subprocess.run(['xdotool', 'windowmap', window_id]) # restore if minimized time.sleep(0.3) subprocess.run(['wmctrl', '-i', '-r', window_id, '-e', f'0,{x},{y},-1,-1']) return True return False except Exception as e: print(f"Error in window operation: {e}") return False if action in ['focus', 'close', 'minimize', 'maximize', 'resize', 'move'] else None
[docs] def write(*keys): """ Write or type text using keyboard (PyAutoGUI or Selenium). Args: *keys: Variable arguments depending on mode Returns: True if successful, False otherwise Output: - Prints error message if element was not found (Selenium). - Prints error message if invalid arguments are provided. Example: :: # PyAutoGUI mode (types in any active window) write("Hello World") # Types in active window write("user@example.com") # Types email write("12345") # Types numbers as string # Selenium mode - type on active element in browser write(driver, "Hello World") # Types on active element write(driver, "Search query") # Types in focused input # Selenium mode - type in specific element write(driver, "id", "username", "john_doe") write(driver, "xpath", "//input[@name='email']", "user@example.com") write(driver, "class", "search-box", "Python tutorial") Note: - PyAutoGUI uses typewrite() which types one character at a time. - Selenium uses send_keys() which types the entire string at once. """ try: # ------------------------------------------------------------ # MODE 1 : PYAUTOGUI (1 argument - text only) # needs: display # ------------------------------------------------------------ if len(keys) == 1: if not _GUI_AVAILABLE: print("Error: write() requires a display.") return False pyautogui.typewrite(keys[0]) time.sleep(1) return True # ------------------------------------------------------------ # SELENIUM MODE (driver object detected) # needs: nothing : works on all platforms # ------------------------------------------------------------ elif len(keys) >= 2 and hasattr(keys[0], 'find_element'): driver_obj = keys[0] # ---------------------------------------------------------- # MODE 2 : Type on page (driver, text) # ---------------------------------------------------------- if len(keys) == 2: # write(driver, "Hello World") text = keys[1] action = ActionChains(driver_obj) action.send_keys(text).perform() return True # ---------------------------------------------------------- # MODE 3 : Type in specific element (driver, selector_type, selector, text) # ---------------------------------------------------------- elif len(keys) == 4: # write(driver, "id", "username", "john") selector_type = keys[1] selector = keys[2] text = keys[3] element = _get_web_element(driver_obj, selector_type, selector) if element: element.send_keys(text) return True else: print(f"Element not found: {selector_type} - {selector}") return False else: print("Error: Invalid number of arguments for Selenium write()") print("Use: write(driver, text) or write(driver, selector_type, selector, text)") return False # ---------------Invalid Arguments--------------- else: print("Error: Invalid arguments for write()") print("Use: write(text) or write(driver, text) or write(driver, selector_type, selector, text)") return False except Exception as e: print(f"Error writing text: {e}") return False
[docs] def year(): """ Get current year. Returns: int: Current year (e.g., 2026) Example: year() # 2026 """ return time.localtime().tm_year
[docs] def zoom(*args): """ Zoom in/out using steps or set zoom percentage. Modes: 1. PyAutoGUI steps/reset: (value) 2. Selenium steps: (driver, value) where value is -9 to +9 3. Selenium percentage: (driver, value) where value is outside -9 to +9 4. Selenium reset: (driver, 100) or (driver, 0) Args: *args: Variable arguments:: - (value): PyAutoGUI zoom steps/reset - (driver, value): Selenium zoom steps/percentage/reset Returns: True if successful, False otherwise Raises: ValueError: If arguments are invalid, zoom value is not an integer, PyAutoGUI value is outside -9 to +9 (except 100), or Selenium percentage is less than 1%. Output: - Prints zoom direction and step count (PyAutoGUI). - Prints new zoom percentage after change (Selenium). - Prints confirmation when zoom is reset. Value Logic: - -9 to +9: Zoom steps (Ctrl+/Ctrl-) - 100 or 0: Reset to default/100% - Outside range (except 100): Percentage (Selenium only) Example: :: # PyAutoGUI (desktop apps) zoom(3) # Zoom in 3 steps zoom(-5) # Zoom out 5 steps zoom(100) or zoom(0) # Reset to default (Ctrl+0) When zoom in/out is performed using UI (Ctrl and +/-) in Chrome, the min %, zoom states % and max % follow this order: (25, 33, 50, 67, 75, 80, 90, 100, 110, 125, 150, 175, 200, 250, 300, 400, 500) # Selenium (browser) - Steps zoom(driver, 3) # Zoom in current + 3 * 10% zoom(driver, -5) # Zoom out current - 5 * 10% # Selenium (browser) - Reset zoom(driver, 100) # Reset to 100% zoom(driver, 0) # Reset to 100% # Selenium (browser) - Percentage zoom(driver, 150) # Set zoom to 150% zoom(driver, 75) # Set zoom to 75% zoom(driver, 50) # Set zoom to 50% zoom(driver, 200) # Set zoom to 200% Note: - Selenium zoom is applied via JavaScript and is not reflected in the Chrome URL bar or the kebab menu zoom indicator. - PyAutoGUI reset (0 or 100) uses Ctrl+0 which resets to the application's default zoom, which may not always be 100% (e.g. a PDF viewer may default to 'fit to page' instead). - Selenium reset explicitly sets zoom to exactly 100%. """ # ------------------------------------------------------------ # DETERMINE MODE # ------------------------------------------------------------ if len(args) == 1: # PyAutoGUI mode use_driver = False driver_obj = None value = args[0] elif len(args) == 2 and hasattr(args[0], 'execute_script'): # Selenium mode - driver object detected use_driver = True driver_obj = args[0] value = args[1] else: raise ValueError("Invalid arguments. Use zoom(value) or zoom(driver, value)") # Validate value if not isinstance(value, int): raise ValueError("Zoom value must be an integer") # PyAutoGUI restrictions if not use_driver and value != 100 and abs(value) > 9: raise ValueError("PyAutoGUI mode supports steps (-9 to +9) or reset (100) or (0)") try: # ------------------------------------------------------------ # SELENIUM MODE # needs: nothing : works on all platforms # ------------------------------------------------------------ if use_driver: # Special case: 100 or 0 = Reset if value == 100 or value == 0: driver_obj.execute_script("document.body.style.zoom='100%'") # "reset to 100%" here because Selenium explicitly sets zoom to 100% # unlike PyAutoGUI's Ctrl+0 which resets to the default zoom level # of whatever application is currently active print("Zoom reset to 100%") return True # Steps: -9 to +9 elif -9 <= value <= 9: # Get current zoom level current_zoom = driver_obj.execute_script("return document.body.style.zoom || '100%'") current_zoom = int(current_zoom.replace('%', '')) if current_zoom else 100 if value > 0: new_zoom = current_zoom + (value * 10) # each step = 10% driver_obj.execute_script(f"document.body.style.zoom='{new_zoom}%'") print(f"Zoomed in {value} step(s) - zoom: {new_zoom}%") elif value < 0: steps = abs(value) new_zoom = current_zoom - (steps * 10) # each step = 10% new_zoom = max(10, new_zoom) # minimum 10% zoom driver_obj.execute_script(f"document.body.style.zoom='{new_zoom}%'") print(f"Zoomed out {steps} step(s) - zoom: {new_zoom}%") return True # Percentage: Outside -9 to +9 (excluding 100) else: if value < 1: raise ValueError("Zoom percentage must be at least 1%") driver_obj.execute_script(f"document.body.style.zoom='{value}%'") print(f"Zoom set to {value}%") return True # ------------------------------------------------------------ # PYAUTOGUI MODE (desktop apps) # needs: display # ------------------------------------------------------------ else: if not _GUI_AVAILABLE: print("Error: zoom() requires a display.") return False # Special case: 100 or 0 = Reset if value == 100 or value == 0: pyautogui.hotkey('ctrl', '0') # "reset to default" not "reset to 100%" because Ctrl+0 resets to # the application's default zoom, which may not always be 100% # e.g. a PDF viewer may default to "fit to page" instead of 100% print("Zoom reset to default.") return True # Steps: -9 to +9 elif value > 0: # Zoom in for _ in range(value): pyautogui.hotkey('ctrl', '+') time.sleep(0.3) print(f"Zoomed in {value} step(s)") elif value < 0: # Zoom out steps = abs(value) for _ in range(steps): pyautogui.hotkey('ctrl', '-') time.sleep(0.3) print(f"Zoomed out {steps} step(s)") return True except Exception as e: print(f"Error zooming: {e}") return False