eval-view/pyproject.toml at main · hidai25/eval-view · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
[build-system]
requires = ["setuptools>=68.0", "wheel"]
build-backend = "setuptools.build_meta"

[project]
name = "evalview"
version = "0.6.2"
description = "Open-source testing and regression detection framework for AI agents. Golden baseline diffing, CI/CD integration, works with LangGraph, CrewAI, OpenAI, Anthropic Claude, HuggingFace, Ollama, and MCP."
readme = "README.md"
license = "Apache-2.0"
authors = [{name = "EvalView Team", email = "hidai@evalview.com"}]
requires-python = ">=3.9"
keywords = [
    "ai", "agents", "testing", "evaluation", "llm", "langchain", "langgraph", "crewai", "openai",
    "anthropic", "claude", "huggingface", "ollama", "mcp",
    "multi-agent", "pytest-ai", "ai-agent-testing", "llm-testing", "agent-evaluation",
    "regression-testing", "golden-baseline", "ci-cd-testing", "yaml-testing", "tool-calling",
    "agent-regression", "llm-regression", "skill-testing", "mcp-contract-testing",
    "pass-at-k", "non-deterministic-testing", "ai-ci-cd"
]
classifiers = [
    "Development Status :: 4 - Beta",
    "Intended Audience :: Developers",
    "Programming Language :: Python :: 3",
    "Programming Language :: Python :: 3.9",
    "Programming Language :: Python :: 3.10",
    "Programming Language :: Python :: 3.11",
    "Programming Language :: Python :: 3.12",
    "Topic :: Software Development :: Testing",
    "Topic :: Software Development :: Quality Assurance",
    "Topic :: Scientific/Engineering :: Artificial Intelligence",
]
dependencies = [
    "click>=8.1.0",
    "pydantic>=2.5.0",
    "pyyaml>=6.0",
    "openai>=1.12.0",
    "anthropic>=0.39.0",
    "rich>=13.7.0",
    "prompt_toolkit>=3.0.0",
    "httpx>=0.26.0",
    "python-dateutil>=2.8.2",
    "python-dotenv>=1.0.0",
    "jinja2>=3.0",
    "jsonschema>=4.0.0",
]

[project.optional-dependencies]
# Interactive charts on top of the built-in HTML report
reports = [
    "plotly>=5.0",
]
# Watch mode for development
watch = [
    "watchdog>=3.0",
]
# Telemetry (opt-out anonymous usage analytics)
telemetry = [
    "posthog>=3.0.0",
]
# Cohere adapter
cohere = [
    "cohere>=5.0.0",
]
# Mistral adapter
mistral = [
    "mistralai>=1.0.0",
]
# All optional features
all = [
    "plotly>=5.0",
    "watchdog>=3.0",
    "posthog>=3.0.0",
    "cohere>=5.0.0",
    "mistralai>=1.0.0",
]
# Development dependencies
dev = [
    "pytest>=7.4.0",
    "pytest-asyncio>=0.21.0",
    "pytest-cov>=4.1.0",
    "black==24.10.0",
    "mypy>=1.7.0",
    "types-PyYAML>=6.0",
    "ruff>=0.1.0",
    "jinja2>=3.0",
    "plotly>=5.0",
    "watchdog>=3.0",
    "anthropic>=0.39.0",
    # For mock agent in dogfood tests
    "fastapi>=0.109.0",
    "uvicorn>=0.27.0",
]

[project.scripts]
evalview = "evalview.cli:main"

[project.entry-points."pytest11"]
evalview = "evalview.pytest_plugin"

[project.urls]
Homepage = "https://evalview.com"
Documentation = "https://github.com/hidai25/eval-view#readme"
Repository = "https://github.com/hidai25/eval-view.git"
"Issue Tracker" = "https://github.com/hidai25/eval-view/issues"
Changelog = "https://github.com/hidai25/eval-view/blob/main/CHANGELOG.md"

[tool.setuptools.packages.find]
where = ["."]
include = ["evalview*"]

# Non-Python files that must ship inside the installed package.
# The canary YAMLs are loaded at runtime by `evalview model-check` and
# break the command if they are missing from a wheel build.
[tool.setuptools.package-data]
"evalview.benchmarks.canary" = ["*.yaml", "*.md"]
"evalview.templates.patterns" = ["*.yaml"]

[tool.black]
line-length = 100
target-version = ['py39']

[tool.ruff]
line-length = 100
target-version = "py39"

[tool.mypy]
python_version = "3.9"
warn_return_any = false
warn_unused_configs = true
ignore_missing_imports = true
disallow_untyped_defs = false
check_untyped_defs = true

# Exclude CLI which has many click decorators that are hard to type
[[tool.mypy.overrides]]
module = "evalview.cli"
ignore_errors = true

[[tool.mypy.overrides]]
module = "evalview.commands.run.*"
ignore_errors = true

[[tool.mypy.overrides]]
module = "evalview.reporters.*"
ignore_errors = true

[[tool.mypy.overrides]]
module = "evalview.tracking.*"
ignore_errors = true

[[tool.mypy.overrides]]
module = "evalview.recorder"
ignore_errors = true

[[tool.mypy.overrides]]
module = "evalview.evaluators.*"
ignore_errors = true

[[tool.mypy.overrides]]
module = "evalview.core.loader"
ignore_errors = true

[[tool.mypy.overrides]]
module = "evalview.adapters.*"
ignore_errors = true

[[tool.mypy.overrides]]
module = "evalview.telemetry.*"
ignore_errors = true

[[tool.mypy.overrides]]
module = "yaml"
ignore_missing_imports = true

# pytest_plugin imports pytest internals whose stubs use match statements
# (Python 3.10+), which mypy rejects under python_version = "3.9".
[[tool.mypy.overrides]]
module = "evalview.pytest_plugin"
ignore_errors = true

# _pytest internals use match statements (Python 3.10+). mypy can't parse
# them under python_version = "3.9" — skip following imports into them entirely.
[[tool.mypy.overrides]]
module = "_pytest.*"
follow_imports = "skip"