aseseri · aseseri · Jun 13, 2025 · Jun 13, 2025 · Jun 13, 2025 · Jun 13, 2025
diff --git a/.gitignore b/.gitignore
@@ -20,3 +20,6 @@ backend/tests.http
 
 # ML Models & Artifacts
 *.joblib
+
+# Environment Variables
+.env
diff --git a/Makefile b/Makefile
@@ -37,14 +37,14 @@ logs: ## Follow logs for all services
 # ==============================================================================
 # 			DEVELOPMENT & TESTING
 # ==============================================================================
-setup: up seed-db build-model ## Run this once to setup a new environment
-	@echo "$(GREEN)✅ Initial setup complete!$(RESET)"
+setup: up seed-db build-model ## Run this once to setup a new LOCAL environment from scratch
+	@echo "$(GREEN)✅ Initial local setup complete! Database is seeded and model is built.$(RESET)"
 
-seed-db: ## Run the database seed script (requires services to be up)
-	@echo "$(YELLOW)--> Seeding database...$(RESET)"
+seed-db: ## Run the database seed script on the LOCAL docker DB
+	@echo "$(YELLOW)--> Seeding LOCAL database...$(RESET)"
 	@docker compose exec backend python seed_database.py
 
-build-model: ## Run the ML model training script (requires services to be up)
+build-model: ## Run the ML model training script
 	@echo "$(YELLOW)--> Building similarity model...$(RESET)"
 	@docker compose exec backend python build_similarity_model.py
 
@@ -57,4 +57,26 @@ test-backend: ## Run backend python tests
 
 test-frontend: ## Run frontend javascript tests
 	@echo "$(GREEN)--> Running frontend tests...$(RESET)"
-	@docker compose run --rm frontend npm test -- --watchAll=false
+	@docker compose run --rm frontend npm test -- --watchAll=false
+
+test-ci: ## Simulate the CI environment perfectly
+	@echo "$(YELLOW)--> Running tests in a clean CI-like environment...$(RESET)"
+	@docker run --rm \
+		-v "$(shell pwd)/backend":/app \
+		-w /app \
+		python:3.11-slim \
+		sh -c "pip install -r requirements.txt && pip install pytest && pytest"
+
+# ==============================================================================
+# 			PRODUCTION BUILDS
+# ==============================================================================
+build-frontend-prod: ## Build the production frontend image for deployment
+	@echo "$(YELLOW)--> Building production frontend image...$(RESET)"
+	@gcloud builds submit --config frontend/cloudbuild.yaml \
+	  --substitutions=_API_BASE_URL=https://wnba-backend-service-776933261932.us-west1.run.app \
+	  ./frontend
+
+# You could also add one for the backend for consistency
+build-backend-prod: ## Build the production backend image for deployment
+	@echo "$(YELLOW)--> Building production backend image...$(RESET)"
+	@gcloud builds submit ./backend --tag gcr.io/wnba-analytics-prod/wnba-backend
diff --git a/backend/Dockerfile b/backend/Dockerfile
@@ -1,15 +1,21 @@
 # backend/Dockerfile
-FROM python:3.11-slim
 
+# --- Stage 1: The "base" stage ---
+FROM python:3.11-slim AS base
 WORKDIR /app
-
 COPY requirements.txt .
-
 RUN pip install --no-cache-dir -r requirements.txt
 
+# --- Stage 2: The "development" stage ---
+FROM base AS development
+WORKDIR /app
 COPY . .
-
-# Expose the port the app runs on
-EXPOSE 8000
-
 CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000", "--reload"]
+
+# --- Stage 3: The "production" stage ---
+FROM base AS production
+WORKDIR /app
+COPY . .
+RUN python build_similarity_model.py
+EXPOSE 8080
+CMD uvicorn main:app --host 0.0.0.0 --port ${PORT:-8080}
diff --git a/backend/build_similarity_model.py b/backend/build_similarity_model.py
@@ -1,56 +1,56 @@
 # backend/build_similarity_model.py
+
 import logging
 import pandas as pd
 import joblib
-from sqlalchemy import text
+import json
+
 from sklearn.preprocessing import StandardScaler
 from sklearn.metrics.pairwise import cosine_similarity
 
-from database import SessionLocal, engine
-
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 
+# The JSON files are now the source of truth for the build
+DATA_FILES = [
+    'data/wnba_combined_2024.json',
+]
+
 def build_model():
-    logger.info("Connecting to database to fetch player stats...")
-    db = SessionLocal()
-
-    # Load all player stats into a Pandas DataFrame
-    # SQL query to join players and stats
-    query = """
-    SELECT
-        p.id as player_id,
-        p.first_name,
-        p.last_name,
-        ps.season,
-        ps.points_per_game,
-        ps.rebounds_per_game,
-        ps.assists_per_game,
-        ps.steals_per_game,
-        ps.blocks_per_game,
-        ps.field_goal_percentage,
-        ps.three_point_percentage,
-        ps.player_efficiency_rating
-    FROM players p
-    JOIN player_stats ps ON p.id = ps.player_id
-    """
-    df = pd.read_sql(text(query), con=engine.connect())
-    db.close()
-    logger.info(f"Successfully loaded {len(df)} player seasons from the database.")
-
-    if df.empty:
-        logger.error("No data found in the database. Please run the seed script first.")
-        return
-
-    # Select features used for comparison
+    logger.info("Loading player data from JSON files...")
+
+    all_seasons_df = []
+    for file_name in DATA_FILES:
+        year = file_name.split('_')[2].split('.')[0]
+        with open(file_name, 'r') as f:
+            season_data = json.load(f)
+
+            # Add a 'season' column to each record
+            for record in season_data:
+                record['season'] = year
+
+            all_seasons_df.append(pd.DataFrame(season_data))
+
+    df = pd.concat(all_seasons_df, ignore_index=True)
+
+    # Clean player names and handle multi-team players ('TOT')
+    df['Player'] = df['Player'].str.replace('*', '', regex=False)
+    df = df[df['Team'] != 'TOT'] # Exclude total rows
+    df = df.dropna(subset=['Player']) # Drop rows with no player name
+
+    logger.info(f"Successfully loaded {len(df)} total player seasons.")
+
+    # Define the unique ID and the features for the model
+    df['player_season_id'] = df['Player'] + ' (' + df['season'] + ')'
     features = [
-        'points_per_game', 'rebounds_per_game', 'assists_per_game',
-        'steals_per_game', 'blocks_per_game', 'field_goal_percentage',
-        'three_point_percentage', 'player_efficiency_rating'
+        'PTS', 'TRB', 'AST', 'STL', 'BLK', 'FG%', '3P%', 'PER', 'WS'
     ]
+    # Ensure all feature columns exist and fill NaNs with 0
+    for feature in features:
+        if feature not in df.columns:
+            df[feature] = 0
+    df[features] = df[features].fillna(0)
 
-    # Unique identifier for each player-season
-    df['player_season_id'] = df['first_name'] + ' ' + df['last_name'] + ' (' + df['season'] + ')'
     df_features = df.set_index('player_season_id')[features]
 
     # Normalize the data
@@ -59,8 +59,6 @@ def build_model():
     logger.info("Features have been scaled.")
 
     # Calculate Cosine Similarity
-    # This creates a big matrix where every player-season is compared to every other one.
-    # The result is a score from 0 (completely different) to 1 (identical).
     similarity_matrix = cosine_similarity(scaled_features)
     logger.info("Similarity matrix has been calculated.")
 
@@ -69,9 +67,6 @@ def build_model():
     joblib.dump(similarity_matrix, 'similarity_matrix.joblib')
 
     logger.info("Model artifacts have been saved successfully!")
-    logger.info("-> similarity_data.joblib (Player data and vectors)")
-    logger.info("-> similarity_matrix.joblib (Comparison scores)")
-
 
 if __name__ == "__main__":
     build_model()
diff --git a/backend/database.py b/backend/database.py
@@ -3,20 +3,38 @@
 from sqlalchemy import create_engine
 from sqlalchemy.orm import sessionmaker, declarative_base
 
-# --- Check for a test-specific database URL first ---
-# The getenv() function reads an environment variable.
-# We provide a default value for local development.
-DATABASE_URL = os.getenv(
-    "DATABASE_URL", 
-    "postgresql://admin:password123@db/wnba_db"
-)
+def get_database_url():
+    """
+    Gets the database URL from environment variables with a specific order of priority:
+    1. An explicit DATABASE_URL (used for testing).
+    2. Google Cloud Run's special socket connection (for production).
+    3. The local Docker PostgreSQL database (for development).
+    """
+    # 1. Highest priority: Check for the testing URL.
+    if db_url := os.getenv("DATABASE_URL"):
+        return db_url
+
+    # 2. Second priority: Check if running in Google Cloud Run.
+    if os.getenv("K_SERVICE"):
+        db_user = os.environ["DB_USER"]
+        db_pass = os.environ["DB_PASS"]
+        db_name = os.environ["DB_NAME"]
+        db_socket_dir = "/cloudsql"
+        instance_connection_name = os.environ["INSTANCE_CONNECTION_NAME"]
+
+        # Return the Unix Socket connection string.
+        return (
+            f"postgresql+psycopg2://{db_user}:{db_pass}@/{db_name}"
+            f"?host={db_socket_dir}/{instance_connection_name}"
+        )
+
+    # 3. Default: Fall back to the local Docker database URL.
+    return "postgresql://admin:password123@db:5432/wnba_db"
+
+DATABASE_URL = get_database_url()
 
 # --- Use the DATABASE_URL variable ---
 engine = create_engine(DATABASE_URL)
 
-# For SQLite, we need a special argument. We'll add it only if needed.
-if "sqlite" in DATABASE_URL:
-    engine = create_engine(DATABASE_URL, connect_args={"check_same_thread": False})
-
 SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
 Base = declarative_base()
diff --git a/backend/main.py b/backend/main.py
@@ -42,7 +42,8 @@ async def lifespan(app: FastAPI):
 
 app = FastAPI(lifespan=lifespan)
 
-origins = ["http://localhost:3000"]
+origins = ["http://localhost:3000",
+           "https://wnba-frontend-service-776933261932.us-west1.run.app"]
 app.add_middleware(
     CORSMiddleware,
     allow_origins=origins,

diff --git a/backend/tests/conftest.py b/backend/tests/conftest.py
@@ -1,28 +1,39 @@
 # backend/tests/conftest.py
 import pytest
 import os
-
-# This tells our app to use the SQLite DB for all tests.
-os.environ['DATABASE_URL'] = "sqlite:///./test.db"
-
-from fastapi.testclient import TestClient
 from sqlalchemy import create_engine
 from sqlalchemy.orm import sessionmaker
 
+# Set the environment variable BEFORE other imports.
+os.environ['DATABASE_URL'] = "sqlite:///./test.db"
+
 from main import app, get_db
-from database import Base, engine # We can now import the engine safely
+from database import Base, engine
 
 TestingSessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
 
-# Override the get_db dependency to use the test database
-app.dependency_overrides[get_db] = lambda: TestingSessionLocal()
-
 @pytest.fixture(scope="function")
 def db_session():
+    """
+    This fixture creates a clean database with all tables for a single test function,
+    and then drops all tables after the test is done.
+    """
     Base.metadata.create_all(bind=engine)
-    yield TestingSessionLocal()
-    Base.metadata.drop_all(bind=engine)
 
-@pytest.fixture(scope="function")
-def test_client(db_session):
-    yield TestClient(app)
+    # This is the key: we override the app's dependency to use our
+    # clean, temporary database for the duration of the test.
+    def override_get_db():
+        try:
+            db = TestingSessionLocal()
+            yield db
+        finally:
+            db.close()
+
+    app.dependency_overrides[get_db] = override_get_db
+
+    # Yield nothing, just perform setup and teardown
+    yield
+
+    # Teardown: clean up the override and drop tables
+    del app.dependency_overrides[get_db]
+    Base.metadata.drop_all(bind=engine)
diff --git a/backend/tests/test_main.py b/backend/tests/test_main.py
@@ -1,10 +1,11 @@
 # backend/tests/test_main.py
+from fastapi.testclient import TestClient
+from main import app
 
-def test_read_root(test_client):
-    """
-    Tests if the root API endpoint ('/api') returns a successful response
-    and the expected JSON message.
-    """
-    response = test_client.get("/api")
+# This test doesn't need a database, so it doesn't need a fixture.
+def test_read_root():
+    # It creates its own client.
+    client = TestClient(app)
+    response = client.get("/api")
     assert response.status_code == 200
     assert response.json() == {"message": "WNBA Analytics API is running!"}