From f43d0677e11f9277c0c5af6232e954d66683d081 Mon Sep 17 00:00:00 2001 From: Junyi Hou Date: Mon, 2 Dec 2024 14:21:13 +0800 Subject: [PATCH] chore: update code comments and add pre-commit --- .flake8 | 3 ++ .pre-commit-config.yaml | 48 ++++++++++++++++++++++++++++ .ruff.toml | 32 +++++++++++++++++++ src/configs.py | 12 ++++--- src/manager.py | 70 ++++++++++++++++++++++++++++++++--------- 5 files changed, 146 insertions(+), 19 deletions(-) create mode 100644 .flake8 create mode 100644 .pre-commit-config.yaml create mode 100644 .ruff.toml diff --git a/.flake8 b/.flake8 new file mode 100644 index 0000000..09db884 --- /dev/null +++ b/.flake8 @@ -0,0 +1,3 @@ +[flake8] +ignore = E501, E203 +max-line-length = 200 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..b771785 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,48 @@ +# See https://pre-commit.com for more information +# See https://pre-commit.com/hooks.html for more hooks +repos: + - repo: https://github.com/PyCQA/isort + rev: 5.13.2 + hooks: + - id: isort + args: [--profile, black] + + # Using this mirror lets us use mypyc-compiled black, which is about 2x faster + - repo: https://github.com/psf/black-pre-commit-mirror + rev: 24.4.2 + hooks: + - id: + black + # It is recommended to specify the latest version of Python + # supported by your project here, or alternatively use + # pre-commit's default_language_version, see + # https://pre-commit.com/#top_level-default_language_version + language_version: python3.12 + args: ["--line-length", "200", "--exclude", "migrations/"] + + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.0.285 + hooks: + - id: ruff + alias: autoformat + args: [--fix] + + - repo: https://github.com/pycqa/flake8 + rev: 7.1.0 + hooks: + - id: flake8 + exclude: ^tests/(data|examples)/ + + - repo: https://github.com/pre-commit/mirrors-mypy + rev: v1.10.1 + hooks: + - id: mypy + args: [--ignore-missing-imports, --no-namespace-packages] + + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v3.2.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - id: check-yaml + - id: check-added-large-files diff --git a/.ruff.toml b/.ruff.toml new file mode 100644 index 0000000..189a6ae --- /dev/null +++ b/.ruff.toml @@ -0,0 +1,32 @@ +# Exclude a variety of commonly ignored directories. +exclude = [ + ".bzr", + ".direnv", + ".eggs", + ".git", + ".git-rewrite", + ".hg", + ".ipynb_checkpoints", + ".mypy_cache", + ".nox", + ".pants.d", + ".pyenv", + ".pytest_cache", + ".pytype", + ".ruff_cache", + ".svn", + ".tox", + ".venv", + ".vscode", + "__pypackages__", + "_build", + "buck-out", + "build", + "dist", + "node_modules", + "site-packages", + "venv", +] + +line-length = 200 +target-version = "py312" diff --git a/src/configs.py b/src/configs.py index 802f9be..4965bfd 100644 --- a/src/configs.py +++ b/src/configs.py @@ -75,9 +75,13 @@ LANGUAGES = [ # regex, have_many_results, result_too_lang REGEX_LIST = [ - (re.compile(r"sk-proj-[A-Za-z0-9-_]{74}T3BlbkFJ[A-Za-z0-9-_]{73}A"), True, True), # Named Project API Key (no matter normal or restricted) - (re.compile(r"sk-proj-[A-Za-z0-9-_]{58}T3BlbkFJ[A-Za-z0-9-_]{58}"), True, True), # Old Project API Key - (re.compile(r"sk-svcacct-[A-Za-z0-9-_]+T3BlbkFJ[A-Za-z0-9-_]+"), False, False), # Service Account Key + # Named Project API Key (no matter normal or restricted) still valid until Dec 2, 2024 + (re.compile(r"sk-proj-[A-Za-z0-9-_]{74}T3BlbkFJ[A-Za-z0-9-_]{73}A"), True, True), + # Old Project API Key + (re.compile(r"sk-proj-[A-Za-z0-9-_]{58}T3BlbkFJ[A-Za-z0-9-_]{58}"), True, True), + # Service Account Key + (re.compile(r"sk-svcacct-[A-Za-z0-9-_]+T3BlbkFJ[A-Za-z0-9-_]+"), False, False), (re.compile(r"sk-proj-[A-Za-z0-9]{20}T3BlbkFJ[A-Za-z0-9]{20}"), True, False), - (re.compile(r"sk-[a-zA-Z0-9]{48}"), True, False), # Deprecated by OpenAI + # Old key format (deprecated by OpenAI) + (re.compile(r"sk-[a-zA-Z0-9]{48}"), True, False), ] diff --git a/src/manager.py b/src/manager.py index c5e06c1..a89585d 100644 --- a/src/manager.py +++ b/src/manager.py @@ -1,16 +1,20 @@ """ -This module is used to manage the progress and the cookies. +Progress and Cookie Management Module -It includes the following classes: -- ProgressManager: to manage the progress -- CookieManager: to manage the cookies -- DatabaseManager: to manage the database +This module provides functionality for managing application progress, cookies, +and database operations. + +Classes: + ProgressManager: Handles progress tracking and persistence + CookieManager: Manages browser cookie operations + DatabaseManager: Handles database interactions """ + import logging import os -import sys import pickle import sqlite3 +import sys import time from datetime import date from sqlite3 import Connection, Cursor @@ -18,12 +22,31 @@ from sqlite3 import Connection, Cursor from selenium.common.exceptions import UnableToSetCookieException from selenium.webdriver.common.by import By -FORMAT = "%(message)s" -logging.basicConfig(level=logging.INFO, format=FORMAT, datefmt="[%X]") -log = logging.getLogger("ChatGPT-API-Leakage") +LOGGER_NAME = "ChatGPT-API-Leakage" +LOG_FORMAT = "%(asctime)s - %(name)s - %(levelname)s - %(message)s" +logging.basicConfig(level=logging.INFO, format=LOG_FORMAT, datefmt="[%X]") +logger = logging.getLogger(LOGGER_NAME) + + +class ProgressManagerError(Exception): + """Custom exception for ProgressManager class errors""" + + def __init__(self, message): + super().__init__(message) class ProgressManager: + """ + Manages and persists progress information for long-running operations. + + Attributes: + progress_file (Path): Path to the progress file + + Methods: + save: Saves current progress + load: Loads saved progress + """ + def __init__(self, progress_file=".progress.txt"): self.progress_file = progress_file @@ -32,6 +55,23 @@ class ProgressManager: file.write(f"{from_iter}/{total}/{time.time()}") def load(self, total: int) -> int: + """ + Loads the previously saved progress if available and valid. + + Args: + total (int): The total number of iterations expected in the current process + + Returns: + int: The iteration number to continue from: + - Returns the last saved iteration if: + - Progress file exists + - Last save was within last hour (3600 seconds) + - Saved total matches current total + - User confirms to continue + - Returns 0 otherwise (fresh start) + + The progress file format is: "{last_iteration}/{total_iterations}/{timestamp}" + """ if not os.path.exists(self.progress_file): return 0 @@ -55,7 +95,7 @@ class CookieManager: cookies = self.driver.get_cookies() with open("cookies.pkl", "wb") as file: pickle.dump(cookies, file) - log.info("🍪 Cookies saved") + logger.info("🍪 Cookies saved") def load(self): try: @@ -65,23 +105,23 @@ class CookieManager: try: self.driver.add_cookie(cookie) except UnableToSetCookieException: - log.debug("🟡 Warning, unable to set a cookie %s", cookie) + logger.debug("🟡 Warning, unable to set a cookie %s", cookie) except (EOFError, pickle.UnpicklingError): if os.path.exists("cookies.pkl"): os.remove("cookies.pkl") - log.error("🔴 Error, unable to load cookies, invalid cookies has been removed, please restart.") + logger.error("🔴 Error, unable to load cookies, invalid cookies has been removed, please restart.") def verify_user_login(self): """ Test if the user is really logged in """ - log.info("🤗 Redirecting ...") + logger.info("🤗 Redirecting ...") self.driver.get("https://github.com/") if self.driver.find_elements(by=By.XPATH, value="//*[contains(text(), 'Sign in')]"): if os.path.exists("cookies.pkl"): os.remove("cookies.pkl") - log.error("🔴 Error, you are not logged in, please restart and try again.") + logger.error("🔴 Error, you are not logged in, please restart and try again.") sys.exit(1) return True @@ -90,6 +130,7 @@ class DatabaseManager: """ This class is used to manage the database. """ + def __init__(self, db_filename: str): self.db_filename = db_filename self.con: Connection | None = None @@ -113,7 +154,6 @@ class DatabaseManager: if self.con: self.con.close() - def all_keys(self) -> list: if self.cur is None: raise ValueError("Cursor is not initialized")