Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ repos:
- --force-single-line-imports
- --profile black
- repo: https://github.com/asottile/pyupgrade # Upgrade Python syntax
rev: v3.15.2
rev: v3.21.2
hooks:
- id: pyupgrade
args:
Expand Down
1 change: 1 addition & 0 deletions _includes/head.html
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@
twitter: {{ conf.twitter | jsonify }},
mastodon: {{ conf.mastodon | jsonify }},
bluesky: {{ conf.bluesky | jsonify }},
youtube: {{ conf.youtube | jsonify }},
location: {{ conf.location | jsonify }},
extra_places: {{ conf.extra_places | jsonify }},
workshop_deadline: {{ conf.workshop_deadline | jsonify }},
Expand Down
3 changes: 3 additions & 0 deletions _includes/index_conf_title_row.html
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@
{% elsif conf.twitter %}
<a title="Twitter" href="https://twitter.com/{{conf.twitter}}" target="_blank" rel="noopener noreferrer"><img src="/static/img/407-twitter.svg" alt="Twitter" width="14" height="14" /></a>
{% endif %}
{% if conf.bluesky %}
<a title="Bluesky" href="{{conf.bluesky}}" target="_blank" rel="noopener noreferrer"><i class="fa-brands fa-bluesky" style="width:14px;height:14px;" aria-hidden="true"></i></a>
{% endif %}
</span>
</div>
</div>
Expand Down
12 changes: 12 additions & 0 deletions _layouts/conference.html
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,18 @@ <h2 id="conf-subtitle">a.k.a. {{page.alt_name}} {{page.year}}</h2>
<a id="conf-mastodon" target="_blank" rel="noopener noreferrer" href="{{page.mastodon}}">Mastodon</a>
</div>
{% endif %}
{% if page.bluesky %}
<div>
<i class="fa-brands fa-bluesky" style="width:16px;height:16px;" aria-hidden="true"></i>
<a id="conf-bluesky" target="_blank" rel="noopener noreferrer" href="{{page.bluesky}}">Bluesky</a>
</div>
{% endif %}
{% if page.youtube %}
<div>
<i class="fa-brands fa-youtube" style="width:16px;height:16px;" aria-hidden="true"></i>
<a id="conf-youtube" target="_blank" rel="noopener noreferrer" href="{{page.youtube}}">YouTube</a>
</div>
{% endif %}
</div>
</div>
<div id="conf-deadlines" class="row">
Expand Down
12 changes: 12 additions & 0 deletions _layouts/summary.html
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,18 @@ <h1>
<a id="conf-mastodon" target="_blank" rel="noopener noreferrer" href="{{confs[0].mastodon}}">Mastodon</a>
</div>
{% endif %}
{% if confs[0].bluesky %}
<div>
<i class="fa-brands fa-bluesky" style="width:16px;height:16px;" aria-hidden="true"></i>
<a id="conf-bluesky" target="_blank" rel="noopener noreferrer" href="{{confs[0].bluesky}}">Bluesky</a>
</div>
{% endif %}
{% if confs[0].youtube %}
<div>
<i class="fa-brands fa-youtube" style="width:16px;height:16px;" aria-hidden="true"></i>
<a id="conf-youtube" target="_blank" rel="noopener noreferrer" href="{{confs[0].youtube}}">YouTube</a>
</div>
{% endif %}
</div>
</div>
<div id="all_confs">
Expand Down
99 changes: 99 additions & 0 deletions tests/test_youtube_extraction.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
"""Tests for YouTube link extraction and Mastodon/YouTube disambiguation."""

import sys
from pathlib import Path
from unittest.mock import patch

sys.path.append(str(Path(__file__).parent.parent / "utils"))

from enrich_tba import extract_links_from_url


class TestYouTubeExtraction:
"""Test YouTube link detection in extract_links_from_url."""

@patch("enrich_tba.get_all_links")
def test_youtube_channel_detected(self, mock_links):
"""YouTube /@channel links are detected as youtube, not mastodon."""
mock_links.return_value = [
"https://www.youtube.com/@PyConUS",
]
result = extract_links_from_url("https://pycon.org")
assert "youtube" in result
assert result["youtube"] == "https://www.youtube.com/@PyConUS"
assert "mastodon" not in result

@patch("enrich_tba.get_all_links")
def test_youtube_channel_url_without_at(self, mock_links):
"""YouTube channel links without @ are detected."""
mock_links.return_value = [
"https://www.youtube.com/channel/UCMjMBMGt0WP2usFilILnbcA",
]
result = extract_links_from_url("https://pycon.org")
assert "youtube" in result
assert "mastodon" not in result

@patch("enrich_tba.get_all_links")
def test_youtube_not_mistaken_for_mastodon(self, mock_links):
"""YouTube /@username must not end up in mastodon field."""
mock_links.return_value = [
"https://www.youtube.com/@EuroPython",
"https://fosstodon.org/@europython",
]
result = extract_links_from_url("https://europython.eu")
assert result.get("youtube") == "https://www.youtube.com/@EuroPython"
assert result.get("mastodon") == "https://fosstodon.org/@europython"

@patch("enrich_tba.get_all_links")
def test_youtu_be_short_link(self, mock_links):
"""Short youtu.be links are detected as youtube."""
mock_links.return_value = [
"https://youtu.be/abc123",
]
result = extract_links_from_url("https://pycon.org")
assert "youtube" in result
assert "mastodon" not in result

@patch("enrich_tba.get_all_links")
def test_mastodon_still_works(self, mock_links):
"""Mastodon links on known instances still detected correctly."""
mock_links.return_value = [
"https://fosstodon.org/@pycon",
]
result = extract_links_from_url("https://pycon.org")
assert "mastodon" in result
assert result["mastodon"] == "https://fosstodon.org/@pycon"
assert "youtube" not in result

@patch("enrich_tba.get_all_links")
def test_generic_mastodon_still_works(self, mock_links):
"""Generic /@username on unknown instances still detected as mastodon."""
mock_links.return_value = [
"https://social.example.org/@pyconf",
]
result = extract_links_from_url("https://pyconf.org")
assert "mastodon" in result
assert "youtube" not in result

@patch("enrich_tba.get_all_links")
def test_youtube_first_seen_wins(self, mock_links):
"""Only the first YouTube link is kept."""
mock_links.return_value = [
"https://www.youtube.com/@PyConUS",
"https://www.youtube.com/@AnotherChannel",
]
result = extract_links_from_url("https://pycon.org")
assert result["youtube"] == "https://www.youtube.com/@PyConUS"

@patch("enrich_tba.get_all_links")
def test_all_social_links_extracted(self, mock_links):
"""YouTube, Mastodon, and Bluesky can all be extracted together."""
mock_links.return_value = [
"https://bsky.app/profile/pycon.org",
"https://www.youtube.com/@PyConUS",
"https://fosstodon.org/@pycon",
]
result = extract_links_from_url("https://pycon.org")
assert "bluesky" in result
assert "youtube" in result
assert "mastodon" in result
27 changes: 20 additions & 7 deletions utils/enrich_tba.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@
MAX_CONTENT_LENGTH = 15000 # Max characters per conference website

# Field type categorization for validation
URL_FIELDS = {"sponsor", "finaid", "mastodon", "bluesky", "cfp_link"}
URL_FIELDS = {"sponsor", "finaid", "mastodon", "bluesky", "youtube", "cfp_link"}
DATE_FIELDS = {"cfp", "workshop_deadline", "tutorial_deadline"}
TIMEZONE_FIELD = "timezone"

Expand Down Expand Up @@ -267,6 +267,11 @@ def get_all_links(url: str) -> list[str]:
return []


def _domain_matches(domain: str, hosts: tuple[str, ...]) -> bool:
"""Return True if domain equals one of hosts or is a subdomain of one."""
return any(domain == h or domain.endswith(f".{h}") for h in hosts)


# Known Mastodon instances (common ones in tech/Python community)
MASTODON_INSTANCES = {
"mastodon.social",
Expand Down Expand Up @@ -334,22 +339,30 @@ def extract_links_from_url(url: str) -> dict[str, str]:
for link in links:
link_lower = link.lower()
parsed_link = urlparse(link)
link_domain = parsed_link.netloc.lower()

is_youtube = _domain_matches(link_domain, ("youtube.com", "youtu.be"))
is_twitter = _domain_matches(link_domain, ("twitter.com", "x.com"))

# Bluesky - always bsky.app/profile/
if "bluesky" not in seen_types and "bsky.app/profile/" in link_lower:
found["bluesky"] = link
seen_types.add("bluesky")
logger.debug(f" Found bluesky: {link}")

# YouTube - youtube.com/@channel or youtu.be links
elif "youtube" not in seen_types and is_youtube:
found["youtube"] = link
seen_types.add("youtube")
logger.debug(f" Found youtube: {link}")

# Mastodon - /@username pattern on known instances or any instance
# Exclude Twitter/X which don't use /@, but guard against edge cases
# Exclude Twitter/X and YouTube which also use /@username patterns
elif "mastodon" not in seen_types and "/@" in link:
domain = parsed_link.netloc.lower()

# Skip Twitter/X domains (exact host or subdomains only)
if domain == "twitter.com" or domain.endswith((".x.com", ".twitter.com")) or domain == "x.com":
# Skip Twitter/X and YouTube domains
if is_twitter or is_youtube:
pass
elif domain in MASTODON_INSTANCES or "mastodon" in domain or "toot" in domain:
elif link_domain in MASTODON_INSTANCES or "mastodon" in link_domain or "toot" in link_domain:
found["mastodon"] = link
seen_types.add("mastodon")
logger.debug(f" Found mastodon: {link}")
Expand Down
1 change: 1 addition & 0 deletions utils/schema.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
twitter: BestConfEver # Twitter handle of conference (Optional)
mastodon: https://mastodon.social/@bconf # Mastodon handle of conference (Optional)
bluesky: https://bsky.app/@bconf # Bluesky handle of conference (Optional)
youtube: https://www.youtube.com/@bconf # YouTube channel of conference (Optional)
sub: PY # Type of conference (see or add _data/types.yml)
note: Important # In case there are extra notes about the conference (Optional)
location: # Geolocation for inclusion in map
Expand Down
3 changes: 2 additions & 1 deletion utils/tidy_conf/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ class Conference(BaseModel):
twitter: str | None = None
mastodon: HttpUrl | None = None
bluesky: str | None = None
youtube: HttpUrl | None = None
sub: str
note: str | None = None
location: list[Location] | None = None
Expand Down Expand Up @@ -121,7 +122,7 @@ def validate_title(cls, v):
return re.sub(r"\b(19|20)\d{2}\b", "", v).strip()
return v

@field_serializer("link", "cfp_link", "sponsor", "finaid", "mastodon")
@field_serializer("link", "cfp_link", "sponsor", "finaid", "mastodon", "youtube")
def ser_url(self, value):
return str(value)

Expand Down
1 change: 1 addition & 0 deletions utils/tidy_conf/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
"twitter",
"mastodon",
"bluesky",
"youtube",
"location",
"extra_places",
]
Expand Down
Loading