mirror of
https://github.com/Junyi-99/ChatGPT-API-Leakage.git
synced 2026-06-02 06:03:51 +02:00
update regex
This commit is contained in:
Vendored
+6
@@ -0,0 +1,6 @@
|
|||||||
|
{
|
||||||
|
"black-formatter.args": [
|
||||||
|
"--line-length",
|
||||||
|
"200"
|
||||||
|
]
|
||||||
|
}
|
||||||
@@ -5,7 +5,6 @@ import pickle
|
|||||||
import re
|
import re
|
||||||
import time
|
import time
|
||||||
from concurrent.futures import ThreadPoolExecutor
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
from sqlite3 import Connection, Cursor
|
|
||||||
|
|
||||||
from selenium import webdriver
|
from selenium import webdriver
|
||||||
from selenium.common.exceptions import UnableToSetCookieException
|
from selenium.common.exceptions import UnableToSetCookieException
|
||||||
@@ -14,16 +13,7 @@ from selenium.webdriver.support import expected_conditions as EC
|
|||||||
from selenium.webdriver.support.ui import WebDriverWait
|
from selenium.webdriver.support.ui import WebDriverWait
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
|
|
||||||
from utils import (
|
from utils import check_key, db_close, db_delete, db_get_all_keys, db_insert, db_key_exists, db_open, db_remove_duplication
|
||||||
check_key,
|
|
||||||
db_close,
|
|
||||||
db_delete,
|
|
||||||
db_get_all_keys,
|
|
||||||
db_insert,
|
|
||||||
db_key_exists,
|
|
||||||
db_open,
|
|
||||||
db_remove_duplication,
|
|
||||||
)
|
|
||||||
|
|
||||||
FORMAT = "%(message)s"
|
FORMAT = "%(message)s"
|
||||||
logging.basicConfig(level=logging.INFO, format=FORMAT, datefmt="[%X]")
|
logging.basicConfig(level=logging.INFO, format=FORMAT, datefmt="[%X]")
|
||||||
@@ -33,13 +23,15 @@ log = logging.getLogger("ChatGPT-API-Leakage")
|
|||||||
class APIKeyLeakageScanner:
|
class APIKeyLeakageScanner:
|
||||||
def __init__(self, db_file: str, keywords: list, languages: list):
|
def __init__(self, db_file: str, keywords: list, languages: list):
|
||||||
self.db_file = db_file
|
self.db_file = db_file
|
||||||
|
|
||||||
log.info(f"📂 Opening database file {self.db_file}")
|
log.info(f"📂 Opening database file {self.db_file}")
|
||||||
self.con, self.cur = db_open(self.db_file)
|
self.con, self.cur = db_open(self.db_file)
|
||||||
|
|
||||||
self.keywords = keywords
|
self.keywords = keywords
|
||||||
self.languages = languages
|
self.languages = languages
|
||||||
self.candidate_urls = [
|
self.candidate_urls = [
|
||||||
f"https://github.com/search?q={keyword}+AND+%28%2Fsk-%5Ba-zA-Z0-9%5D%7B48%7D%2F%29+language%3A{language}&type=code&ref=advsearch"
|
# f"https://github.com/search?q={keyword}+AND+%28%2Fsk-%5Ba-zA-Z0-9%5D%7B48%7D%2F%29+language%3A{language}&type=code&ref=advsearch"
|
||||||
|
f"https://github.com/search?q={keyword}+AND+%28%2Fsk-proj-%5BA-Za-z0-9%5D%7B20%7DT3BlbkFJ%5BA-Za-z0-9%5D%7B20%7D%2F%29+language%3A{language}&type=code&ref=advsearch"
|
||||||
for language in self.languages
|
for language in self.languages
|
||||||
for keyword in self.keywords
|
for keyword in self.keywords
|
||||||
]
|
]
|
||||||
@@ -59,18 +51,9 @@ class APIKeyLeakageScanner:
|
|||||||
self.driver.add_cookie(cookie)
|
self.driver.add_cookie(cookie)
|
||||||
except UnableToSetCookieException as e:
|
except UnableToSetCookieException as e:
|
||||||
log.debug(f"🟡 Warning, unable to set a cookie {cookie}")
|
log.debug(f"🟡 Warning, unable to set a cookie {cookie}")
|
||||||
except EOFError as e:
|
except (EOFError, pickle.UnpicklingError):
|
||||||
if os.path.exists("cookies.pkl"):
|
os.remove("cookies.pkl") if os.path.exists("cookies.pkl") else None
|
||||||
os.remove("cookies.pkl")
|
log.error("🔴 Error, unable to load cookies, invalid cookies has been removed, please restart.")
|
||||||
log.error(
|
|
||||||
"🔴 Error, unable to load cookies, invalid cookies has been removed, please restart."
|
|
||||||
)
|
|
||||||
except pickle.UnpicklingError as e:
|
|
||||||
if os.path.exists("cookies.pkl"):
|
|
||||||
os.remove("cookies.pkl")
|
|
||||||
log.error(
|
|
||||||
"🔴 Error, load cookies failed, invalid cookies has been removed, please restart."
|
|
||||||
)
|
|
||||||
|
|
||||||
def _test_cookies(self):
|
def _test_cookies(self):
|
||||||
"""
|
"""
|
||||||
@@ -79,26 +62,18 @@ class APIKeyLeakageScanner:
|
|||||||
log.info("🤗 Redirecting ...")
|
log.info("🤗 Redirecting ...")
|
||||||
self.driver.get("https://github.com/")
|
self.driver.get("https://github.com/")
|
||||||
|
|
||||||
if self.driver.find_elements(
|
if self.driver.find_elements(by=By.XPATH, value="//*[contains(text(), 'Sign in')]"):
|
||||||
by=By.XPATH, value="//*[contains(text(), 'Sign in')]"
|
|
||||||
):
|
|
||||||
return False
|
return False
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def _hit_rate_limit(self):
|
|
||||||
return self.driver.find_elements(
|
|
||||||
by=By.XPATH,
|
|
||||||
value="//*[contains(text(), 'You have exceeded a secondary rate limit')]",
|
|
||||||
)
|
|
||||||
|
|
||||||
def login_to_github(self):
|
def login_to_github(self):
|
||||||
log.info("🌍 Opening Chrome ...")
|
log.info("🌍 Opening Chrome ...")
|
||||||
|
|
||||||
self.options = webdriver.ChromeOptions()
|
options = webdriver.ChromeOptions()
|
||||||
self.options.add_argument("--ignore-certificate-errors")
|
options.add_argument("--ignore-certificate-errors")
|
||||||
self.options.add_argument("--ignore-ssl-errors")
|
options.add_argument("--ignore-ssl-errors")
|
||||||
|
|
||||||
self.driver = webdriver.Chrome(options=self.options)
|
self.driver = webdriver.Chrome(options=options)
|
||||||
self.driver.implicitly_wait(3)
|
self.driver.implicitly_wait(3)
|
||||||
|
|
||||||
cookie_exists = os.path.exists("cookies.pkl")
|
cookie_exists = os.path.exists("cookies.pkl")
|
||||||
@@ -113,8 +88,7 @@ class APIKeyLeakageScanner:
|
|||||||
self._load_cookies()
|
self._load_cookies()
|
||||||
|
|
||||||
if not self._test_cookies():
|
if not self._test_cookies():
|
||||||
if os.path.exists("cookies.pkl"):
|
os.remove("cookies.pkl") if os.path.exists("cookies.pkl") else None
|
||||||
os.remove("cookies.pkl")
|
|
||||||
log.error("🔴 Error, you are not logged in, please restart and try again.")
|
log.error("🔴 Error, you are not logged in, please restart and try again.")
|
||||||
exit(1)
|
exit(1)
|
||||||
|
|
||||||
@@ -122,27 +96,20 @@ class APIKeyLeakageScanner:
|
|||||||
|
|
||||||
def _process_url(self, url: str):
|
def _process_url(self, url: str):
|
||||||
self.driver.get(url)
|
self.driver.get(url)
|
||||||
pattern = re.compile(r"sk-[a-zA-Z0-9]{48}")
|
pattern = re.compile(r"sk-proj-[A-Za-z0-9]{20}T3BlbkFJ[A-Za-z0-9]{20}")
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
# If current webpage is reached the rate limit, then wait for 30 seconds
|
# If current webpage is reached the rate limit, then wait for 30 seconds
|
||||||
if self._hit_rate_limit():
|
if self.driver.find_elements(by=By.XPATH, value="//*[contains(text(), 'You have exceeded a secondary rate limit')]"):
|
||||||
for _ in tqdm(range(30), desc="⏳ Rate limit reached, waiting ..."):
|
for _ in tqdm(range(30), desc="⏳ Rate limit reached, waiting ..."):
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
self.driver.refresh()
|
self.driver.refresh()
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Expand all the code
|
# Expand all the code
|
||||||
[
|
[element.click() for element in self.driver.find_elements(by=By.XPATH, value="//*[contains(text(), 'more match')]")]
|
||||||
element.click()
|
|
||||||
for element in self.driver.find_elements(
|
|
||||||
by=By.XPATH, value="//*[contains(text(), 'more match')]"
|
|
||||||
)
|
|
||||||
]
|
|
||||||
|
|
||||||
codes = self.driver.find_elements(
|
codes = self.driver.find_elements(by=By.CLASS_NAME, value="code-list") # find all elements with class name 'f4'
|
||||||
by=By.CLASS_NAME, value="code-list"
|
|
||||||
) # find all elements with class name 'f4'
|
|
||||||
for element in codes:
|
for element in codes:
|
||||||
apis = pattern.findall(element.text)
|
apis = pattern.findall(element.text)
|
||||||
if len(apis) == 0:
|
if len(apis) == 0:
|
||||||
@@ -156,20 +123,12 @@ class APIKeyLeakageScanner:
|
|||||||
for idx, result in enumerate(results):
|
for idx, result in enumerate(results):
|
||||||
db_insert(self.con, self.cur, apis[idx], result)
|
db_insert(self.con, self.cur, apis[idx], result)
|
||||||
|
|
||||||
next_buttons = self.driver.find_elements(
|
next_buttons = self.driver.find_elements(by=By.XPATH, value="//a[@aria-label='Next Page']")
|
||||||
by=By.XPATH, value="//a[@aria-label='Next Page']"
|
|
||||||
)
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
WebDriverWait(self.driver, 5).until(
|
WebDriverWait(self.driver, 5).until(EC.presence_of_element_located((By.XPATH, "//a[@aria-label='Next Page']")))
|
||||||
EC.presence_of_element_located(
|
|
||||||
(By.XPATH, "//a[@aria-label='Next Page']")
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
next_buttons = self.driver.find_elements(
|
next_buttons = self.driver.find_elements(by=By.XPATH, value="//a[@aria-label='Next Page']")
|
||||||
by=By.XPATH, value="//a[@aria-label='Next Page']"
|
|
||||||
)
|
|
||||||
next_buttons[0].click()
|
next_buttons[0].click()
|
||||||
except Exception as _:
|
except Exception as _:
|
||||||
# log.info(" ⚪️ No more pages")
|
# log.info(" ⚪️ No more pages")
|
||||||
@@ -177,27 +136,24 @@ class APIKeyLeakageScanner:
|
|||||||
|
|
||||||
def _save_progress(self, from_iter: int):
|
def _save_progress(self, from_iter: int):
|
||||||
with open(".progress.txt", "w") as file:
|
with open(".progress.txt", "w") as file:
|
||||||
# Save the progress and timestamp
|
|
||||||
file.write(f"{from_iter}/{len(self.candidate_urls)}/{time.time()}")
|
file.write(f"{from_iter}/{len(self.candidate_urls)}/{time.time()}")
|
||||||
|
|
||||||
def _load_progress(self):
|
def load_progress(self):
|
||||||
if not os.path.exists(".progress.txt"):
|
progress_file = ".progress.txt"
|
||||||
return 0
|
if not os.path.exists(progress_file):
|
||||||
with open(".progress.txt", "r") as file:
|
|
||||||
progress = file.read().strip().split("/")
|
|
||||||
last = int(progress[0])
|
|
||||||
totl = int(progress[1])
|
|
||||||
tmst = float(progress[2])
|
|
||||||
# if the time is less than 1 hour, then continue from the last progress
|
|
||||||
if time.time() - tmst < 3600 and totl == len(self.candidate_urls):
|
|
||||||
# ask the user if they want to continue from the last progress
|
|
||||||
action = input(f"🔍 Progress found, do you want to continue from the last progress ({last}/{totl})? [yes] | no: ")
|
|
||||||
if action.lower() == "yes" or action.lower() == "y" or action == "":
|
|
||||||
return int(progress[0])
|
|
||||||
else:
|
|
||||||
return 0
|
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
with open(progress_file, "r") as file:
|
||||||
|
last, totl, tmst = file.read().strip().split("/")
|
||||||
|
last, totl = int(last), int(totl)
|
||||||
|
|
||||||
|
if time.time() - float(tmst) < 3600 and totl == len(self.candidate_urls):
|
||||||
|
action = input(f"🔍 Progress found, do you want to continue from the last progress ({last}/{totl})? [yes] | no: ").lower()
|
||||||
|
if action in {"yes", "y", ""}:
|
||||||
|
return last
|
||||||
|
|
||||||
|
return 0
|
||||||
|
|
||||||
def search(self, from_iter: int = None):
|
def search(self, from_iter: int = None):
|
||||||
pbar = tqdm(
|
pbar = tqdm(
|
||||||
enumerate(self.candidate_urls),
|
enumerate(self.candidate_urls),
|
||||||
@@ -206,7 +162,7 @@ class APIKeyLeakageScanner:
|
|||||||
)
|
)
|
||||||
|
|
||||||
if from_iter is None:
|
if from_iter is None:
|
||||||
from_iter = self._load_progress()
|
from_iter = self.load_progress()
|
||||||
|
|
||||||
for idx, url in enumerate(self.candidate_urls):
|
for idx, url in enumerate(self.candidate_urls):
|
||||||
if idx < from_iter:
|
if idx < from_iter:
|
||||||
|
|||||||
@@ -14,9 +14,7 @@ def db_get_all_keys(cur: Cursor) -> list:
|
|||||||
|
|
||||||
|
|
||||||
def db_remove_duplication(con: Connection, cur: Cursor) -> None:
|
def db_remove_duplication(con: Connection, cur: Cursor) -> None:
|
||||||
cur.execute(
|
cur.execute("CREATE TABLE temp_table as SELECT apiKey, status, MAX(lastChecked) as lastChecked FROM APIKeys GROUP BY apiKey;")
|
||||||
"CREATE TABLE temp_table as SELECT apiKey, status, MAX(lastChecked) as lastChecked FROM APIKeys GROUP BY apiKey;"
|
|
||||||
)
|
|
||||||
cur.execute("DROP TABLE APIKeys;")
|
cur.execute("DROP TABLE APIKeys;")
|
||||||
cur.execute("ALTER TABLE temp_table RENAME TO APIKeys;")
|
cur.execute("ALTER TABLE temp_table RENAME TO APIKeys;")
|
||||||
con.commit()
|
con.commit()
|
||||||
|
|||||||
Reference in New Issue
Block a user