mirror of
https://github.com/aaif-goose/goose.git
synced 2026-06-02 06:14:27 +02:00
Quick fix for community all stars script (#5798)
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
This commit is contained in:
@@ -7,14 +7,20 @@ This script analyzes GitHub contributor statistics and generates rankings for:
|
||||
- Top 5 Team Stars (Block employees, non-goose team)
|
||||
- Monthly Leaderboard (all eligible contributors)
|
||||
|
||||
The script automatically:
|
||||
- Fetches contributor data from GitHub API (with retry logic)
|
||||
- Checks public org memberships to detect Block employees
|
||||
- Categorizes contributors as Block or External
|
||||
- Caches data locally for faster subsequent runs
|
||||
|
||||
Usage:
|
||||
python3 community_stars.py "November 2025"
|
||||
python3 community_stars.py "November 1, 2025 - November 17, 2025"
|
||||
python3 community_stars.py "2025-11-01 - 2025-11-17"
|
||||
|
||||
Requirements:
|
||||
- GitHub contributor data at /tmp/github_contributors.json
|
||||
- Team list file (local or from GitHub)
|
||||
- Internet connection (to fetch GitHub data)
|
||||
- Team list file at documentation/scripts/community_stars_teams.txt
|
||||
"""
|
||||
|
||||
import json
|
||||
@@ -24,11 +30,52 @@ import urllib.request
|
||||
from datetime import datetime
|
||||
import calendar
|
||||
from pathlib import Path
|
||||
import time
|
||||
|
||||
# GitHub URL for team list file
|
||||
TEAMS_FILE_URL = "https://raw.githubusercontent.com/block/goose/main/documentation/scripts/community_stars_teams.txt"
|
||||
LOCAL_TEAMS_FILE = Path(__file__).parent / "community_stars_teams.txt"
|
||||
|
||||
# Block-related organizations to check
|
||||
BLOCK_ORGS = {'square', 'block', 'squareup', 'block-ghc', 'cashapp'}
|
||||
|
||||
def is_block_employee(username):
|
||||
"""Check if a user is a Block employee by checking their profile and org memberships.
|
||||
|
||||
Makes a single API call to get user profile (includes company field),
|
||||
then only calls orgs endpoint if company field doesn't match.
|
||||
"""
|
||||
try:
|
||||
# First check the user's profile (single API call)
|
||||
url = f"https://api.github.com/users/{username}"
|
||||
with urllib.request.urlopen(url) as response:
|
||||
user_data = json.loads(response.read().decode('utf-8'))
|
||||
|
||||
# Check company field first (no additional API call needed)
|
||||
company = user_data.get('company', '').lower() if user_data.get('company') else ''
|
||||
if company:
|
||||
# Check for Block-related keywords in company field
|
||||
block_keywords = ['block', 'square', 'cash app', 'cashapp', 'tidal']
|
||||
if any(keyword in company for keyword in block_keywords):
|
||||
return True
|
||||
|
||||
# Only check orgs if company field didn't match (second API call only when needed)
|
||||
url = f"https://api.github.com/users/{username}/orgs"
|
||||
with urllib.request.urlopen(url) as response:
|
||||
orgs = json.loads(response.read().decode('utf-8'))
|
||||
|
||||
# Check if any org matches Block orgs (case-insensitive)
|
||||
user_orgs = {org['login'].lower() for org in orgs}
|
||||
if user_orgs & BLOCK_ORGS:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
# If we can't check (rate limit, network error, etc.), return False
|
||||
# This means we'll default to treating them as external
|
||||
return False
|
||||
|
||||
def load_team_lists():
|
||||
"""Load and parse team lists from file (local or GitHub)."""
|
||||
content = None
|
||||
@@ -51,7 +98,6 @@ def load_team_lists():
|
||||
goose_maintainers = set()
|
||||
block_non_goose = set()
|
||||
external_goose = set()
|
||||
external = set()
|
||||
bots = set()
|
||||
|
||||
current_section = None
|
||||
@@ -67,15 +113,11 @@ def load_team_lists():
|
||||
current_section = 'block_non_goose'
|
||||
elif '# External, goose' in line:
|
||||
current_section = 'external_goose'
|
||||
elif line.startswith('# External') and 'goose' not in line.lower():
|
||||
current_section = 'external'
|
||||
elif '# Bots' in line:
|
||||
current_section = 'bots'
|
||||
continue
|
||||
|
||||
# Add username to appropriate set (lowercase for case-insensitive matching)
|
||||
# Apply .lower() to entire username including brackets (e.g., "dependabot[bot]")
|
||||
# This matches the pattern used above: 'goose' not in line.lower()
|
||||
username = line.lower()
|
||||
if current_section == 'goose_maintainers':
|
||||
goose_maintainers.add(username)
|
||||
@@ -83,12 +125,10 @@ def load_team_lists():
|
||||
block_non_goose.add(username)
|
||||
elif current_section == 'external_goose':
|
||||
external_goose.add(username)
|
||||
elif current_section == 'external':
|
||||
external.add(username)
|
||||
elif current_section == 'bots':
|
||||
bots.add(username)
|
||||
|
||||
return goose_maintainers, block_non_goose, external_goose, external, bots
|
||||
return goose_maintainers, block_non_goose, external_goose, bots
|
||||
|
||||
def parse_date_range(date_input):
|
||||
"""Parse various date input formats and return start/end timestamps."""
|
||||
@@ -148,25 +188,69 @@ def main():
|
||||
sys.exit(1)
|
||||
|
||||
# Load team lists
|
||||
goose_maintainers, block_non_goose, external_goose, external, bots = load_team_lists()
|
||||
goose_maintainers, block_non_goose, external_goose, bots = load_team_lists()
|
||||
|
||||
# Load GitHub data
|
||||
github_data_file = '/tmp/github_contributors.json'
|
||||
contributors_data = None
|
||||
|
||||
# Try to load existing file first
|
||||
try:
|
||||
with open(github_data_file, 'r') as f:
|
||||
contributors_data = json.load(f)
|
||||
except FileNotFoundError:
|
||||
print(f"Error: GitHub contributor data not found at {github_data_file}")
|
||||
print("Please run: curl -s -H 'Accept: application/vnd.github.v3+json' 'https://api.github.com/repos/block/goose/stats/contributors' > /tmp/github_contributors.json")
|
||||
sys.exit(1)
|
||||
except json.JSONDecodeError as e:
|
||||
print(f"Error: Invalid JSON in {github_data_file}")
|
||||
print(f"Details: {e}")
|
||||
print("The GitHub API may have returned an error. Try fetching the data again.")
|
||||
sys.exit(1)
|
||||
|
||||
# Validate the data is not empty or invalid
|
||||
if not contributors_data or not isinstance(contributors_data, list) or len(contributors_data) == 0:
|
||||
print(f"Warning: GitHub data file exists but is empty or invalid. Fetching fresh data...", file=sys.stderr)
|
||||
contributors_data = None
|
||||
except (FileNotFoundError, json.JSONDecodeError):
|
||||
print(f"GitHub data file not found or invalid. Fetching fresh data...", file=sys.stderr)
|
||||
contributors_data = None
|
||||
|
||||
# Fetch from GitHub API if needed
|
||||
if contributors_data is None:
|
||||
print("Fetching contributor data from GitHub API...", file=sys.stderr)
|
||||
max_retries = 3
|
||||
retry_delay = 2
|
||||
|
||||
for attempt in range(max_retries):
|
||||
try:
|
||||
url = "https://api.github.com/repos/block/goose/stats/contributors"
|
||||
with urllib.request.urlopen(url, timeout=30) as response:
|
||||
contributors_data = json.loads(response.read().decode('utf-8'))
|
||||
|
||||
# Validate the response
|
||||
if contributors_data and isinstance(contributors_data, list) and len(contributors_data) > 0:
|
||||
# Save to file for future use
|
||||
with open(github_data_file, 'w') as f:
|
||||
json.dump(contributors_data, f)
|
||||
print(f"✓ Successfully fetched data for {len(contributors_data)} contributors", file=sys.stderr)
|
||||
break
|
||||
else:
|
||||
print(f"Attempt {attempt + 1}/{max_retries}: GitHub API returned empty data. Retrying...", file=sys.stderr)
|
||||
contributors_data = None
|
||||
if attempt < max_retries - 1:
|
||||
time.sleep(retry_delay)
|
||||
except Exception as e:
|
||||
print(f"Attempt {attempt + 1}/{max_retries}: Error fetching from GitHub API: {e}", file=sys.stderr)
|
||||
if attempt < max_retries - 1:
|
||||
time.sleep(retry_delay)
|
||||
else:
|
||||
print("\nError: Could not fetch GitHub contributor data after multiple attempts.")
|
||||
print("The GitHub stats API may be temporarily unavailable or still computing statistics.")
|
||||
print("Please try again in a few minutes.")
|
||||
sys.exit(1)
|
||||
|
||||
if contributors_data is None:
|
||||
print("\nError: GitHub API returned empty data after multiple attempts.")
|
||||
print("The repository statistics may still be computing. Please try again in a few minutes.")
|
||||
sys.exit(1)
|
||||
|
||||
# Process contributors
|
||||
contributor_stats = []
|
||||
checked_orgs = {} # Cache org checks to avoid redundant API calls
|
||||
|
||||
print("Checking contributor organizations...", file=sys.stderr)
|
||||
|
||||
for contributor in contributors_data:
|
||||
# Skip if author is None (deleted users)
|
||||
@@ -199,10 +283,18 @@ def main():
|
||||
# Categorize (only Block non-goose and External now)
|
||||
if username_lower in block_non_goose:
|
||||
category = 'block_non_goose'
|
||||
elif username_lower in external:
|
||||
category = 'external'
|
||||
else:
|
||||
category = 'unknown'
|
||||
# Check if user is in a Block org (with caching)
|
||||
if username not in checked_orgs:
|
||||
checked_orgs[username] = is_block_employee(username)
|
||||
# Add a small delay to avoid rate limiting
|
||||
time.sleep(0.1)
|
||||
|
||||
if checked_orgs[username]:
|
||||
category = 'block_non_goose'
|
||||
print(f" ✓ Detected Block employee: @{username}", file=sys.stderr)
|
||||
else:
|
||||
category = 'external'
|
||||
|
||||
contributor_stats.append({
|
||||
'username': username,
|
||||
@@ -220,7 +312,6 @@ def main():
|
||||
# Separate by category
|
||||
block_list = [c for c in contributor_stats if c['category'] == 'block_non_goose']
|
||||
external_list = [c for c in contributor_stats if c['category'] == 'external']
|
||||
unknown_list = [c for c in contributor_stats if c['category'] == 'unknown']
|
||||
|
||||
# Get top 5 from each
|
||||
top_external = external_list[:5]
|
||||
@@ -255,25 +346,16 @@ def main():
|
||||
print("-" * 70)
|
||||
if contributor_stats:
|
||||
for i, contrib in enumerate(contributor_stats, 1):
|
||||
cat_label = "External" if contrib['category'] == 'external' else "Block" if contrib['category'] == 'block_non_goose' else "Unknown"
|
||||
cat_label = "External" if contrib['category'] == 'external' else "Block"
|
||||
print(f"{i:2d}. @{contrib['username']:20s} - {contrib['commits']:3d} commits, {contrib['total_lines']:6,d} lines [{cat_label}]")
|
||||
else:
|
||||
print("No contributors found for this period.")
|
||||
|
||||
if unknown_list:
|
||||
print()
|
||||
print("⚠️ UNKNOWN CONTRIBUTORS (not in team lists):")
|
||||
print("-" * 70)
|
||||
for contrib in unknown_list:
|
||||
print(f" @{contrib['username']:20s} - {contrib['commits']:3d} commits, {contrib['total_lines']:6,d} lines")
|
||||
|
||||
print()
|
||||
print("=" * 70)
|
||||
print(f"Total contributors (excluding bots, goose maintainers, external goose): {len(contributor_stats)}")
|
||||
print(f" External: {len(external_list)}")
|
||||
print(f" Block (non-goose): {len(block_list)}")
|
||||
if unknown_list:
|
||||
print(f" Unknown: {len(unknown_list)}")
|
||||
print("=" * 70)
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -1,6 +1,10 @@
|
||||
# Community Stars Team Lists
|
||||
# This file categorizes contributors for the block/goose Community Stars program
|
||||
# Format: One username per line under each category header
|
||||
#
|
||||
# Note: External contributors (eligible for Community All-Stars) are automatically
|
||||
# detected - they don't need to be listed here. Anyone not in the lists below
|
||||
# defaults to "external" unless they have public Block org membership.
|
||||
|
||||
# Goose Maintainers (excluded from rankings)
|
||||
angiejones
|
||||
@@ -65,46 +69,6 @@ chaitanyarahalkar
|
||||
The-Best-Codes
|
||||
Abhijay007
|
||||
|
||||
# External (eligible for Community All-Stars)
|
||||
ARYPROGRAMMER
|
||||
dbraduan
|
||||
codefromthecrypt
|
||||
Better-Boy
|
||||
GaryZhous
|
||||
iandouglas
|
||||
lamchau
|
||||
laanak08
|
||||
Lymah123
|
||||
the-matrixneo
|
||||
arielherself
|
||||
Developerayo
|
||||
SalvatoreT
|
||||
sheikhlimon
|
||||
cgwalters
|
||||
Anudhyan
|
||||
johnlanda
|
||||
alexyao2015
|
||||
aegntic
|
||||
bwalding
|
||||
ajgray-stripe
|
||||
sfc-gh-twhite
|
||||
adhintz
|
||||
sana-db
|
||||
toyamagu-2021
|
||||
Shreyanshsingh23
|
||||
Jay4242
|
||||
jalateras
|
||||
sings-to-bees-on-wednesdays
|
||||
myaple
|
||||
necaris
|
||||
par5ul1
|
||||
rockwotj
|
||||
ki3ani
|
||||
vlascik
|
||||
eyelight
|
||||
nick-w-nick
|
||||
ayax79
|
||||
|
||||
# Bots (excluded from rankings)
|
||||
dependabot[bot]
|
||||
SquareGist
|
||||
|
||||
Reference in New Issue
Block a user