diff --git a/.github/workflows/pre-release.yml b/.github/workflows/pre-release.yml index 42f6a5386..774cae65c 100644 --- a/.github/workflows/pre-release.yml +++ b/.github/workflows/pre-release.yml @@ -46,6 +46,15 @@ with: token: ${{ secrets.QLTY_COVERAGE_TOKEN }} files: coverage.xml + # Check translation tags + - name: Check translation tags + id: translation_check + run: | + pipenv run python scripts/check_translations.py > translation_check_report.md || echo "Translation issues found, continuing..." + # Read the report content and save it as an output + echo "TRANSLATION_REPORT<> $GITHUB_ENV + cat translation_check_report.md >> $GITHUB_ENV + echo "EOF" >> $GITHUB_ENV - name: Generate new output files run: | # @@ -112,12 +121,28 @@ cp output/owasp_cornucopia_webapp_3.0_cards_bridge_en.idml output/owasp_cornucopia_webapp_3.0_cards_bridge_qr_en.idml output/owasp_cornucopia_webapp_3.0_cards_tarot_en.idml output/owasp_cornucopia_webapp_3.0_cards_tarot_qr_en.idml output/owasp_cornucopia_webapp_3.0_leaflet_bridge_en.idml output/owasp_cornucopia_webapp_3.0_leaflet_tarot_en.idml output/cornucopia_webapp/ zip -r output/owasp_cornucopia_webapp_3.0_en.zip output/cornucopia_webapp/Links/* output/cornucopia_webapp/Fonts/* output/cornucopia_webapp/owasp_cornucopia_webapp_3.0_cards_bridge_en.idml output/cornucopia_webapp/owasp_cornucopia_webapp_3.0_cards_bridge_qr_en.idml output/cornucopia_webapp/owasp_cornucopia_webapp_3.0_cards_tarot_en.idml output/cornucopia_webapp/owasp_cornucopia_webapp_3.0_cards_tarot_qr_en.idml output/cornucopia_webapp/owasp_cornucopia_webapp_3.0_leaflet_bridge_en.idml output/cornucopia_webapp/owasp_cornucopia_webapp_3.0_leaflet_tarot_en.idml ./resources/templates/owasp_cornucopia_webapp_scoresheet.pdf + - name: Prepare release body with translation report + id: prepare_release + run: | + # Read the translation report + TRANSLATION_REPORT=$(cat translation_check_report.md) + # Create a combined release body + cat > release_body.md << 'EOF' + ## OWASP Cornucopia Pre-Release + + This is an automated pre-release build from the latest master branch. + + --- + + EOF + cat translation_check_report.md >> release_body.md - uses: softprops/action-gh-release@a06a81a03ee405af7f2048a818ed3f03bbf83c7b # v2.5.0 name: "Create pre-release" with: tag_name: pre-release prerelease: true name: Latest pre-release + body_path: release_body.md files: | CHANGELOG.md LICENSE.md diff --git a/.github/workflows/run-tests-generate-output.yaml b/.github/workflows/run-tests-generate-output.yaml index dd19580a9..374a6f6bf 100644 --- a/.github/workflows/run-tests-generate-output.yaml +++ b/.github/workflows/run-tests-generate-output.yaml @@ -45,6 +45,16 @@ jobs: run: | pip install -r requirements.txt --require-hashes pipenv install -d + - name: Check translation tags + run: | + pipenv run python scripts/check_translations.py || echo "Translation issues found, continuing..." + - name: Upload translation check report + if: always() + uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 + with: + retention-days: 5 + name: translation-check-report.${{ github.sha }}.md + path: translation_check_report.md - name: Generate new output files run: | # @@ -134,23 +144,44 @@ jobs: issues: write needs: uploadoutputfiles steps: + - name: Checkout repository + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + with: + ref: ${{ github.event.pull_request.head.ref }} + - name: Download translation check report + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 + with: + name: translation-check-report.${{ github.sha }}.md + path: . - uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0 env: PR_NUMBER: ${{ github.event.number }} - PR_NOTES: | - [badge]: https://img.shields.io/badge/Build-Success!-3fb950?logo=github&style=for-the-badge + ARTIFACT_URL: ${{needs.uploadoutputfiles.outputs.artifact-url}} + with: + script: | + const fs = require('fs'); + let translationReport = ''; + try { + translationReport = fs.readFileSync('translation_check_report.md', 'utf8'); + } catch (error) { + translationReport = 'Translation check report not found.'; + } + + const prNotes = `[badge]: https://img.shields.io/badge/Build-Success!-3fb950?logo=github&style=for-the-badge ## Build artifacts: | Name | Link | |------|------| - | Output files | [cornucopia-build-files.${{ github.sha }}.zip](${{needs.uploadoutputfiles.outputs.artifact-url}}) | + | Output files | [cornucopia-build-files.${{ github.sha }}.zip](${process.env.ARTIFACT_URL}) | + + --- + + ${translationReport}`; - with: - script: | github.rest.issues.createComment({ issue_number: process.env.PR_NUMBER, owner: context.repo.owner, repo: context.repo.repo, - body: process.env.PR_NOTES + body: prNotes }) diff --git a/scripts/check_translations.py b/scripts/check_translations.py new file mode 100644 index 000000000..72545fe08 --- /dev/null +++ b/scripts/check_translations.py @@ -0,0 +1,231 @@ +""" +Translation Tag Checker for OWASP Cornucopia + +This script checks that translation files have the same T0xxx tags as the English version. +It detects: +- Missing tags in translations +- Untranslated tags (text identical to English) +- Empty tag values +""" + +import sys +import yaml +from pathlib import Path +from typing import Dict, List +from collections import defaultdict + + +class TranslationChecker: + """Check translations for missing, untranslated, or empty tags.""" + + def __init__(self, source_dir: Path): + self.source_dir = source_dir + self.results = defaultdict(lambda: defaultdict(dict)) + + def extract_tags(self, yaml_file: Path) -> Dict[str, str]: + """Extract T0xxx tags and their text from a YAML file.""" + tags = {} + try: + with open(yaml_file, 'r', encoding='utf-8') as f: + data = yaml.safe_load(f) + + # Extract tags from paragraphs.sentences + if data and 'paragraphs' in data: + for paragraph in data['paragraphs']: + if 'sentences' in paragraph: + for sentence in paragraph['sentences']: + tag_id = sentence.get('id', '') + if tag_id.startswith('T0'): + tags[tag_id] = sentence.get('text', '') + + except Exception as e: + print(f"Error reading {yaml_file}: {e}", file=sys.stderr) + + return tags + + def get_file_groups(self) -> Dict[str, List[Path]]: + """Group YAML files by their base name (e.g., webapp-cards-2.2).""" + file_groups = defaultdict(list) + + for yaml_file in self.source_dir.glob('*-*.yaml'): + # Skip archived files + if 'archive' in str(yaml_file): + continue + + # Extract base name and language + # Format: {edition}-{component}-{version}-{lang}.yaml + parts = yaml_file.stem.split('-') + if len(parts) >= 3: + # Find language code (usually last part or second to last) + lang = parts[-1] + base_name = '-'.join(parts[:-1]) + + # Only process card files with language codes + if 'cards' in base_name and len(lang) == 2: + file_groups[base_name].append(yaml_file) + + return file_groups + + def check_translations(self) -> Dict[str, Dict[str, Dict[str, List[str]]]]: + """ + Check all translation files against English versions. + + Returns: + Dict with structure: + { + 'base_name': { + 'language': { + 'missing': ['T00145', ...], + 'untranslated': ['T00100', ...], + 'empty': ['T00200', ...] + } + } + } + """ + file_groups = self.get_file_groups() + + for base_name, files in file_groups.items(): + # Find English reference file + english_file = None + translation_files = [] + + for f in files: + lang = f.stem.split('-')[-1] + if lang == 'en': + english_file = f + else: + translation_files.append(f) + + if not english_file: + print(f"Warning: No English file found for {base_name}", file=sys.stderr) + continue + + # Extract English tags + english_tags = self.extract_tags(english_file) + + if not english_tags: + continue + + # Check each translation + for trans_file in translation_files: + lang = trans_file.stem.split('-')[-1] + trans_tags = self.extract_tags(trans_file) + + # Find missing tags + missing = [] + untranslated = [] + empty = [] + + for tag_id, eng_text in english_tags.items(): + if tag_id not in trans_tags: + missing.append(tag_id) + elif not trans_tags[tag_id]: + empty.append(tag_id) + elif trans_tags[tag_id] == eng_text: + untranslated.append(tag_id) + + # Store results + if missing or untranslated or empty: + self.results[base_name][lang] = { + 'missing': sorted(missing), + 'untranslated': sorted(untranslated), + 'empty': sorted(empty), + 'file': str(trans_file.name) + } + + return dict(self.results) + + def generate_markdown_report(self) -> str: + """Generate a Markdown report of translation issues.""" + report_lines = [] + + if not self.results: + report_lines.append("# Translation Check Report\n") + report_lines.append("✅ All existing translations have been completed.\n") + return '\n'.join(report_lines) + + report_lines.append("# Translation Check Report\n") + report_lines.append("The following sentences/tags have issues in the translations:\n") + + # Language name mapping + lang_names = { + 'es': 'Spanish', + 'fr': 'French', + 'hu': 'Hungarian', + 'it': 'Italian', + 'nl': 'Dutch', + 'no-nb': 'Norwegian', + 'pt-br': 'Portuguese (Brazil)', + 'pt-pt': 'Portuguese (Portugal)', + 'ru': 'Russian' + } + + for base_name in sorted(self.results.keys()): + languages = self.results[base_name] + + for lang in sorted(languages.keys()): + lang_name = lang_names.get(lang, lang) + issues = languages[lang] + filename = issues.get('file', '') + + report_lines.append(f"\n## {lang_name}\n") + report_lines.append(f"**File:** `{filename}`\n") + + if issues['missing']: + report_lines.append("### Missing Tags\n") + report_lines.append("The following tags are present in the English version but missing in this translation:\n") + tags_str = ', '.join(issues['missing']) + report_lines.append(f"{tags_str}\n") + + if issues['untranslated']: + report_lines.append("### Untranslated Tags\n") + report_lines.append("The following tags have identical text to English (not translated):\n") + tags_str = ', '.join(issues['untranslated']) + report_lines.append(f"{tags_str}\n") + + if issues['empty']: + report_lines.append("### Empty Tags\n") + report_lines.append("The following tags are empty:\n") + tags_str = ', '.join(issues['empty']) + report_lines.append(f"{tags_str}\n") + + return '\n'.join(report_lines) + + +def main(): + """Main entry point for the translation checker.""" + # Determine source directory + script_dir = Path(__file__).parent + base_dir = script_dir.parent + source_dir = base_dir / 'source' + + if not source_dir.exists(): + print(f"Error: Source directory not found: {source_dir}", file=sys.stderr) + sys.exit(1) + + # Run checker + checker = TranslationChecker(source_dir) + results = checker.check_translations() + + # Generate report + report = checker.generate_markdown_report() + + # Output report + print(report) + + # Write to file + output_file = base_dir / 'translation_check_report.md' + with open(output_file, 'w', encoding='utf-8') as f: + f.write(report) + + print(f"\n---\nReport written to: {output_file}", file=sys.stderr) + + # Exit with error code if issues found + if results: + sys.exit(1) + else: + sys.exit(0) + + +if __name__ == '__main__': + main() diff --git a/scripts/check_translations_itest.py b/scripts/check_translations_itest.py new file mode 100644 index 000000000..2364fef45 --- /dev/null +++ b/scripts/check_translations_itest.py @@ -0,0 +1,122 @@ +""" +Integration tests for translation tag checking. + +Tests that all translations in the actual source directory have the same T0xxx tags as the English version. +""" + +import unittest +import yaml +import re +from pathlib import Path +import sys + +# Add scripts directory to path +scripts_path = Path(__file__).parent.parent.parent / 'scripts' +sys.path.insert(0, str(scripts_path)) + +from check_translations import TranslationChecker + + +class TestTranslationTagsIntegration(unittest.TestCase): + """Integration tests that check actual translation files.""" + + def setUp(self): + """Set up test fixtures.""" + # Navigate up from scripts to cornucopia root + self.base_path = Path(__file__).parent.parent.parent + self.source_dir = self.base_path / 'source' + self.checker = TranslationChecker(self.source_dir) + + def test_source_directory_exists(self): + """Test that the source directory exists.""" + self.assertTrue( + self.source_dir.exists(), + f"Source directory not found: {self.source_dir}" + ) + + def test_english_files_exist(self): + """Test that English card files exist.""" + english_files = list(self.source_dir.glob('*-cards-*-en.yaml')) + self.assertGreater( + len(english_files), 0, + "No English card files found in source directory" + ) + + def test_translations_completeness(self): + """ + Test that all translations have the same T0xxx tags as English. + + This test will fail if: + - Tags are missing in translations + - Tags are untranslated (identical to English) + - Tags are empty + """ + results = self.checker.check_translations() + + if results: + # Generate detailed report + report = self.checker.generate_markdown_report() + + # Count total issues + total_issues = 0 + for base_name, languages in results.items(): + for lang, issues in languages.items(): + total_issues += len(issues.get('missing', [])) + total_issues += len(issues.get('untranslated', [])) + total_issues += len(issues.get('empty', [])) + + self.fail( + f"\\n\\nTranslation issues found ({total_issues} total):\\n\\n{report}\\n" + ) + + def test_no_duplicate_tags_in_english(self): + """Test that English files don't have duplicate T0xxx tags.""" + english_files = list(self.source_dir.glob('*-cards-*-en.yaml')) + + for eng_file in english_files: + with open(eng_file, 'r', encoding='utf-8') as f: + data = yaml.safe_load(f) + + if data and 'paragraphs' in data: + seen_ids = set() + duplicates = [] + + for paragraph in data['paragraphs']: + if 'sentences' in paragraph: + for sentence in paragraph['sentences']: + tag_id = sentence.get('id', '') + if tag_id.startswith('T0'): + if tag_id in seen_ids: + duplicates.append(tag_id) + seen_ids.add(tag_id) + + self.assertEqual( + len(duplicates), 0, + f"Duplicate tags found in {eng_file.name}: {duplicates}" + ) + + def test_tag_format(self): + """Test that tags follow the T0xxxx format.""" + tag_pattern = re.compile(r'^T0\d{4,5}$') + + english_files = list(self.source_dir.glob('*-cards-*-en.yaml')) + + for eng_file in english_files: + tags = self.checker.extract_tags(eng_file) + + for tag_id in tags.keys(): + self.assertIsNotNone( + tag_pattern.match(tag_id), + f"Tag {tag_id} in {eng_file.name} doesn't match format T0xxxx" + ) + + def test_generate_markdown_report(self): + """Test that markdown report generation works.""" + report = self.checker.generate_markdown_report() + + self.assertIsInstance(report, str) + self.assertIn("Translation Check Report", report) + + +if __name__ == '__main__': + unittest.main() diff --git a/scripts/check_translations_utest.py b/scripts/check_translations_utest.py new file mode 100644 index 000000000..568f6c2db --- /dev/null +++ b/scripts/check_translations_utest.py @@ -0,0 +1,131 @@ +""" +Unit tests for translation tag checking. + +Tests the TranslationChecker class with mock data. +""" + +import unittest +import yaml +import re +from pathlib import Path +import sys + +# Add scripts directory to path +scripts_path = Path(__file__).parent.parent.parent / 'scripts' +sys.path.insert(0, str(scripts_path)) + +from check_translations import TranslationChecker + + +class TestTranslationCheckerUnit(unittest.TestCase): + """Unit tests for TranslationChecker using mock files.""" + + def setUp(self): + """Set up test fixtures.""" + # Use test_files directory for mock data + # Navigate from cornucopia/scripts -> cornucopia -> oswap -> tests + script_dir = Path(__file__).parent + cornucopia_dir = script_dir.parent + oswap_dir = cornucopia_dir.parent + self.test_source_dir = oswap_dir / 'tests' / 'test_files' / 'source' + self.checker = TranslationChecker(self.test_source_dir) + + def test_extract_tags_from_english(self): + """Test extracting tags from an English YAML file.""" + english_file = self.test_source_dir / 'test-cards-1.0-en.yaml' + tags = self.checker.extract_tags(english_file) + + self.assertIn('T00001', tags) + self.assertIn('T00002', tags) + self.assertIn('T00003', tags) + self.assertIn('T00004', tags) + self.assertEqual(tags['T00001'], 'This is the first test tag') + + def test_detect_missing_tags(self): + """Test detection of missing tags in translation.""" + results = self.checker.check_translations() + + # Spanish file is missing T00004 + self.assertIn('test-cards-1.0', results) + self.assertIn('es', results['test-cards-1.0']) + self.assertIn('T00004', results['test-cards-1.0']['es']['missing']) + + def test_detect_untranslated_tags(self): + """Test detection of untranslated tags (identical to English).""" + results = self.checker.check_translations() + + # Spanish file has T00002 identical to English + self.assertIn('test-cards-1.0', results) + self.assertIn('es', results['test-cards-1.0']) + self.assertIn('T00002', results['test-cards-1.0']['es']['untranslated']) + + def test_detect_empty_tags(self): + """Test detection of empty tag values.""" + results = self.checker.check_translations() + + # Spanish file has T00003 empty + self.assertIn('test-cards-1.0', results) + self.assertIn('es', results['test-cards-1.0']) + self.assertIn('T00003', results['test-cards-1.0']['es']['empty']) + + def test_generate_report_with_issues(self): + """Test markdown report generation when issues exist.""" + self.checker.check_translations() + report = self.checker.generate_markdown_report() + + self.assertIn('Translation Check Report', report) + self.assertIn('Spanish', report) + self.assertIn('Missing Tags', report) + self.assertIn('Untranslated Tags', report) + self.assertIn('Empty Tags', report) + + def test_tag_format_validation(self): + """Test that tags follow the T0xxxx format.""" + tag_pattern = re.compile(r'^T0\d{4,5}$') + + english_file = self.test_source_dir / 'test-cards-1.0-en.yaml' + tags = self.checker.extract_tags(english_file) + + for tag_id in tags.keys(): + self.assertIsNotNone( + tag_pattern.match(tag_id), + f"Tag {tag_id} doesn't match format T0xxxx" + ) + + def test_no_duplicate_tags(self): + """Test that files don't have duplicate T0xxx tags.""" + english_file = self.test_source_dir / 'test-cards-1.0-en.yaml' + + with open(english_file, 'r', encoding='utf-8') as f: + data = yaml.safe_load(f) + + if data and 'paragraphs' in data: + seen_ids = set() + duplicates = [] + + for paragraph in data['paragraphs']: + if 'sentences' in paragraph: + for sentence in paragraph['sentences']: + tag_id = sentence.get('id', '') + if tag_id.startswith('T0'): + if tag_id in seen_ids: + duplicates.append(tag_id) + seen_ids.add(tag_id) + + self.assertEqual( + len(duplicates), 0, + f"Duplicate tags found: {duplicates}" + ) + + def test_file_groups(self): + """Test that files are correctly grouped by base name.""" + file_groups = self.checker.get_file_groups() + + self.assertIn('test-cards-1.0', file_groups) + files = [f.name for f in file_groups['test-cards-1.0']] + self.assertIn('test-cards-1.0-en.yaml', files) + self.assertIn('test-cards-1.0-es.yaml', files) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_files/source/test-cards-1.0-en.yaml b/tests/test_files/source/test-cards-1.0-en.yaml new file mode 100644 index 000000000..f9b00b439 --- /dev/null +++ b/tests/test_files/source/test-cards-1.0-en.yaml @@ -0,0 +1,32 @@ +--- +meta: + edition: "test" + component: "cards" + language: "EN" + version: "1.0" +suits: +- + id: "TS" + name: "Test Suit" + cards: + - + id: "TSA" + value: "A" + desc: "Test card A" +paragraphs: +- + id: "Common" + name: "Common" + sentences: + - + id: "T00001" + text: "This is the first test tag" + - + id: "T00002" + text: "This is the second test tag" + - + id: "T00003" + text: "This is the third test tag" + - + id: "T00004" + text: "This is the fourth test tag" diff --git a/tests/test_files/source/test-cards-1.0-es.yaml b/tests/test_files/source/test-cards-1.0-es.yaml new file mode 100644 index 000000000..363c9e8c0 --- /dev/null +++ b/tests/test_files/source/test-cards-1.0-es.yaml @@ -0,0 +1,29 @@ +--- +meta: + edition: "test" + component: "cards" + language: "ES" + version: "1.0" +suits: +- + id: "TS" + name: "Test Suit Spanish" + cards: + - + id: "TSA" + value: "A" + desc: "Tarjeta de prueba A" +paragraphs: +- + id: "Common" + name: "Common" + sentences: + - + id: "T00001" + text: "Esta es la primera etiqueta de prueba" + - + id: "T00002" + text: "This is the second test tag" + - + id: "T00003" + text: ""