diff --git a/CLAUDE.md b/CLAUDE.md index 15b2947..6ff47a2 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -16,6 +16,10 @@ uv pip install -e . --group dev ### Testing ```bash +# IMPORTANT Run all tests across Python versions +# to make sure all code changes work on older Python versions +uv run tox -p auto + # Run all tests with coverage uv run pytest @@ -27,9 +31,6 @@ uv run pytest tests/frequentist/test_z_test.py # Run specific test uv run pytest tests/frequentist/test_z_test.py::test_name - -# Run all tests across Python versions -uv run tox ``` ### Code Quality @@ -40,8 +41,11 @@ uv run ruff check # Run formatting uv run ruff format +# Run type checking +uv run ty check + # Run all quality checks (as done in CI) -uv run ruff check && uv run ruff format && uv run pytest +uv run ruff check && uv run ruff format --check && uv run ty check && uv run pytest ``` ### Build @@ -148,4 +152,6 @@ The project uses `tox-uv` to leverage uv's fast package installation and environ ## Code Style -Uses ruff linting and formatting. +- **Linting & Formatting**: Uses [ruff](https://github.com/astral-sh/ruff) for code linting and formatting +- **Type Checking**: Uses [ty](https://github.com/astral-sh/ty) (Astral's fast Python type checker) for static type analysis +- Both tools are configured in `pyproject.toml` and exclude the `examples/` directory diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index 679b470..43c4a4e 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -91,6 +91,7 @@ Ready to contribute? Here's how to set up `confidence` for local development. $ uv run ruff format # Format code $ uv run ruff check # Lint code + $ uv run ty check # Type check code $ uv run pytest # Run tests To test across all supported Python versions (3.9, 3.10, 3.11, 3.12):: diff --git a/pyproject.toml b/pyproject.toml index 25cd3e9..2840a40 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,6 +35,7 @@ dev = [ "pytest-cov>=4.0.0", "pytest-xdist>=3.0.2", "coverage>=7.0.0", + "ty>=0.0.11", ] [project.urls] @@ -53,6 +54,12 @@ extend-exclude = ["examples/"] [tool.ruff.format] quote-style = "double" +[tool.ty.src] +exclude = ["examples/"] + +[tool.ty.rules] +unused-ignore-comment = "ignore" + [tool.pytest.ini_options] addopts = "-v -n auto --cov=spotify_confidence --cov-report=html --cov-report=xml --cov-report=term-missing" testpaths = ["tests"] diff --git a/spotify_confidence/analysis/abstract_base_classes/confidence_computer_abc.py b/spotify_confidence/analysis/abstract_base_classes/confidence_computer_abc.py index 541ea58..af87d3a 100644 --- a/spotify_confidence/analysis/abstract_base_classes/confidence_computer_abc.py +++ b/spotify_confidence/analysis/abstract_base_classes/confidence_computer_abc.py @@ -13,7 +13,7 @@ # limitations under the License. from abc import ABC, abstractmethod -from typing import Iterable, List, Tuple, Union +from typing import Iterable, List, Optional, Tuple, Union from pandas import DataFrame @@ -32,11 +32,11 @@ def compute_difference( level_1: Union[str, Iterable], level_2: Union[str, Iterable], absolute: bool, - groupby: Union[str, Iterable], - nims: NIM_TYPE, - final_expected_sample_size_column: str, + groupby: Optional[Union[str, Iterable]], + nims: Optional[NIM_TYPE], + final_expected_sample_size_column: Optional[str], verbose: bool, - mde_column: str, + mde_column: Optional[str], ) -> DataFrame: """Return dataframe containing the difference in means between group 1 and 2, p-value and confidence interval @@ -46,42 +46,44 @@ def compute_difference( @abstractmethod def compute_multiple_difference( self, - level: Union[str, Iterable], + level: Union[str, Iterable, int], absolute: bool, - groupby: Union[str, Iterable], - level_as_reference: bool, - nims: NIM_TYPE, - final_expected_sample_size_column: str, + groupby: Optional[Union[str, Iterable]], + level_as_reference: Optional[bool], + nims: Optional[NIM_TYPE], + final_expected_sample_size_column: Optional[str], verbose: bool, - mde_column: str, + mde_column: Optional[str], ) -> DataFrame: """Return dataframe containing the difference in means between level and all other groups, with p-value and confidence interval """ pass + @abstractmethod def compute_differences( self, - levels: List[Tuple], + levels: Union[Tuple, List[Tuple]], absolute: bool, - groupby: Union[str, Iterable], - nims: NIM_TYPE, - final_expected_sample_size_column: str, + groupby: Optional[Union[str, Iterable]], + nims: Optional[NIM_TYPE], + final_expected_sample_size_column: Optional[str], verbose: bool, - mde_column: str, + mde_column: Optional[str], ) -> DataFrame: """Return dataframe containing the difference in means between level and all other groups, with p-value and confidence interval """ pass + @abstractmethod def achieved_power( self, level_1: Union[str, Iterable], level_2: Union[str, Iterable], mde: float, alpha: float, - groupby: Union[str, Iterable], + groupby: Optional[Union[str, Iterable]], ) -> DataFrame: """Calculated the achieved power of test of differences between level 1 and level 2 given a targeted MDE. diff --git a/spotify_confidence/analysis/abstract_base_classes/confidence_grapher_abc.py b/spotify_confidence/analysis/abstract_base_classes/confidence_grapher_abc.py index 73f0393..f6ef712 100644 --- a/spotify_confidence/analysis/abstract_base_classes/confidence_grapher_abc.py +++ b/spotify_confidence/analysis/abstract_base_classes/confidence_grapher_abc.py @@ -13,7 +13,7 @@ # limitations under the License. from abc import ABC, abstractmethod -from typing import Iterable, Union +from typing import Iterable, Optional, Union from pandas import DataFrame @@ -29,13 +29,13 @@ def __init__( data_frame: DataFrame, numerator_column: str, denominator_column: str, - categorical_group_columns: str, - ordinal_group_column: str, + categorical_group_columns: Union[str, Iterable], + ordinal_group_column: Optional[str], ): pass @abstractmethod - def plot_summary(self, summary_df: DataFrame, groupby: Union[str, Iterable]) -> ChartGrid: + def plot_summary(self, summary_df: DataFrame, groupby: Optional[Union[str, Iterable]]) -> ChartGrid: """Plot for each group in the data_frame: if ordinal level exists: @@ -57,8 +57,8 @@ def plot_difference( self, difference_df: DataFrame, absolute: bool, - groupby: Union[str, Iterable], - nims: NIM_TYPE, + groupby: Optional[Union[str, Iterable]], + nims: Optional[NIM_TYPE], use_adjusted_intervals: bool, split_plot_by_groups: bool, ) -> ChartGrid: @@ -79,8 +79,8 @@ def plot_differences( self, difference_df: DataFrame, absolute: bool, - groupby: Union[str, Iterable], - nims: NIM_TYPE, + groupby: Optional[Union[str, Iterable]], + nims: Optional[NIM_TYPE], use_adjusted_intervals: bool, split_plot_by_groups: bool, ) -> ChartGrid: @@ -101,9 +101,9 @@ def plot_multiple_difference( self, difference_df: DataFrame, absolute: bool, - groupby: Union[str, Iterable], - level_as_reference: bool, - nims: NIM_TYPE, + groupby: Optional[Union[str, Iterable]], + level_as_reference: Optional[bool], + nims: Optional[NIM_TYPE], use_adjusted_intervals: bool, split_plot_by_groups: bool, ) -> ChartGrid: diff --git a/spotify_confidence/analysis/bayesian/bayesian_base.py b/spotify_confidence/analysis/bayesian/bayesian_base.py index 6177cbc..70f9fc1 100644 --- a/spotify_confidence/analysis/bayesian/bayesian_base.py +++ b/spotify_confidence/analysis/bayesian/bayesian_base.py @@ -27,7 +27,7 @@ # warnings.simplefilter("once") -INITIAL_RANDOMIZATION_SEED = np.random.get_state()[1][0] +INITIAL_RANDOMIZATION_SEED = np.random.get_state()[1][0] # type: ignore[index] def num_decimals(value: float, absolute: bool) -> int: @@ -114,6 +114,11 @@ def __init__( self._all_group_columns = [column for column in self._all_group_columns if column is not None] self._validate_data() + @abstractmethod + def _interval(self, row): + """Return confidence/credible interval for a row. Must be implemented by subclasses.""" + pass + def _validate_data(self): """Integrity check input dataframe.""" if not self._all_group_columns: @@ -137,36 +142,6 @@ def _validate_data(self): Must be number or datetime type.""".format(ordinal_column_type) ) - @classmethod - def as_cumulative( - cls, data_frame, numerator_column, denominator_column, ordinal_group_column, categorical_group_columns=None - ): - """ - Instantiate the class with a cumulative representation of the dataframe. - Sorts by the ordinal variable and calculates the cumulative sum - May be used for to visualize the difference between groups as a - time series. - - Args: - data_frame (pd.DataFrame): DataFrame - numerator_column (str): Column name for numerator column. - denominator_column (str): Column name for denominator column. - ordinal_group_column (str): Column name for ordinal grouping - (e.g. numeric or date values). - categorical_group_columns (str or list), - Optional: Column names for categorical groupings. - - """ - - sorted_df = data_frame.sort_values(by=ordinal_group_column) - cumsum_cols = [numerator_column, denominator_column] - if categorical_group_columns: - sorted_df[cumsum_cols] = sorted_df.groupby(by=categorical_group_columns)[cumsum_cols].cumsum() - else: - sorted_df[cumsum_cols] = sorted_df[cumsum_cols].cumsum() - - return cls(sorted_df, numerator_column, denominator_column, categorical_group_columns, ordinal_group_column) - def summary(self): """Return Pandas DataFrame with summary statistics.""" return self._summary(self._data_frame, self._interval) @@ -490,13 +465,6 @@ def _iterate_groupby_to_dataframe(self, input_function, groupby, **kwargs): return results_data_frame - def _all_groups(self): - """Return a list of all group keys. - - Returns: list""" - groups = list(self._data_frame.groupby(self._all_group_columns).groups.keys()) - return groups - def _add_group_by_columns(self, difference_df, groupby, level_name): if groupby: groupby = groupby[0] if len(groupby) == 1 else groupby @@ -505,55 +473,3 @@ def _add_group_by_columns(self, difference_df, groupby, level_name): else: for col, val in zip(groupby, level_name): difference_df.insert(0, column=col, value=val) - - -# class BinomialResponse(BaseTest, metaclass=ABCMeta): -# """Binomial Response Variable. -# """ - -# class GaussianResponse(BaseTest, metaclass=ABCMeta): -# """Base class for tests of normal response variables - -# E.g. Revenue per user -# """ - -# pass - - -# class PoissonResponse(BaseTest, metaclass=ABCMeta): -# """Base class for tests of poisson response variables. - -# E.g. # of days active per user per month -# """ -# pass - - -# class MultinomialResponse(BaseTest, metaclass=ABCMeta): -# """Base class for tests of multinomial response variables. - -# E.g. single choice answer survey -# self. -# """ - -# def __init__(self, data_frame, categorical_group_columns, -# ordinal_group_column, category_column, value_column): -# self._category_column = category_column -# self._value_column = value_column -# super().__init__(data_frame, categorical_group_columns, -# ordinal_group_column) - - -# class CategoricalResponse(BaseTest, metaclass=ABCMeta): -# """Base class for tests of categorical response variables. - -# E.g. multiple choice answer survey -# """ - -# def __init__(self, data_frame, categorical_group_columns, -# ordinal_group_column, category_column, value_column): -# self._category_column = category_column -# self._value_column = value_column -# super().__init__(data_frame, categorical_group_columns, -# ordinal_group_column) - -# pass diff --git a/spotify_confidence/analysis/bayesian/bayesian_models.py b/spotify_confidence/analysis/bayesian/bayesian_models.py index 722728f..ad540cf 100644 --- a/spotify_confidence/analysis/bayesian/bayesian_models.py +++ b/spotify_confidence/analysis/bayesian/bayesian_models.py @@ -735,38 +735,3 @@ def _categorical_multiple_difference_plot(self, level, absolute, groupby, level_ ) return results_df - - -# class GammaPoisson(PoissonResponse): -# pass - - -# class DirichetMultinomial(MultinomialResponse): -# def __init__(self, -# data_frame, -# group_columns, -# category_column, -# value_column, -# prior_value_column=None): - -# super().__init__(data_frame, group_columns, category_column, -# value_column) - - -# class Gaussian(GaussianResponse): -# def __init__(self, -# data_frame, -# groupings, -# mean_col, -# std_col, -# n_col, -# time_grouping=None, -# prior_columns=None): -# self.prior_lambda_column = prior_lambda_column -# super(BaseGaussianResponse, self).__init__( -# data_frame, groups, mean_col, std_col, n_col, time_grouping) -# raise (NotImplementedError) - - -# class DirichetCategorical(CategoricalResponse): -# pass diff --git a/spotify_confidence/analysis/confidence_utils.py b/spotify_confidence/analysis/confidence_utils.py index a7545a9..d4a0a80 100644 --- a/spotify_confidence/analysis/confidence_utils.py +++ b/spotify_confidence/analysis/confidence_utils.py @@ -14,7 +14,7 @@ from collections import OrderedDict from concurrent.futures.thread import ThreadPoolExecutor -from typing import Iterable, List, Tuple, Union +from typing import Iterable, List, Optional, Tuple, Union import numpy as np from pandas import DataFrame, Series, concat @@ -35,28 +35,19 @@ def groupbyApplyParallel(dfGrouped, func_to_apply): return concat(ret_list) -def applyParallel(df, func_to_apply, splits=32): - with ThreadPoolExecutor(max_workers=splits, thread_name_prefix="applyParallel") as p: - ret_list = p.map( - func_to_apply, - np.array_split(df, min(splits, len(df))), - ) - return concat(ret_list) - - -def get_all_group_columns(categorical_columns: Iterable, additional_column: str) -> Iterable: +def get_all_group_columns(categorical_columns: Iterable, additional_column: Optional[str]) -> List: all_columns = listify(categorical_columns) + listify(additional_column) return list(OrderedDict.fromkeys(all_columns)) -def remove_group_columns(categorical_columns: Iterable, additional_column: str) -> Iterable: +def remove_group_columns(categorical_columns: Iterable, additional_column: Optional[str]) -> List: od = OrderedDict.fromkeys(categorical_columns) if additional_column is not None: del od[additional_column] return list(od) -def validate_categorical_columns(categorical_group_columns: Union[str, Iterable]) -> Iterable: +def validate_categorical_columns(categorical_group_columns: Union[str, Iterable]) -> None: if isinstance(categorical_group_columns, str): pass elif isinstance(categorical_group_columns, Iterable): @@ -69,28 +60,32 @@ def validate_categorical_columns(categorical_group_columns: Union[str, Iterable] ) -def listify(column_s: Union[str, Iterable]) -> List: - if isinstance(column_s, str): +def listify(column_s: Union[str, Iterable, None]) -> List: + if column_s is None: + return [] + elif isinstance(column_s, str): return [column_s] elif isinstance(column_s, Iterable): return list(column_s) - elif column_s is None: + else: return [] -def get_remaning_groups(all_groups: Iterable, some_groups: Iterable) -> Iterable: +def get_remaining_groups(all_groups: Union[str, Iterable, None], some_groups: Union[str, Iterable, None]) -> List: + if all_groups is None: + return [] + all_groups_list = listify(all_groups) if some_groups is None: - remaining_groups = all_groups - else: - remaining_groups = [group for group in all_groups if group not in some_groups and group is not None] - return remaining_groups + return all_groups_list + some_groups_list = listify(some_groups) + return [group for group in all_groups_list if group not in some_groups_list and group is not None] def get_all_categorical_group_columns( categorical_columns: Union[str, Iterable, None], metric_column: Union[str, None], treatment_column: Union[str, None], -) -> Iterable: +) -> List: all_columns = listify(treatment_column) + listify(categorical_columns) + listify(metric_column) return list(OrderedDict.fromkeys(all_columns)) @@ -118,7 +113,7 @@ def validate_and_rename_columns(df: DataFrame, columns: Iterable[str]) -> DataFr if (df[column + SFX1].isna() == df[column + SFX2].isna()).all() and ( df[column + SFX1][df[column + SFX1].notna()] == df[column + SFX2][df[column + SFX2].notna()] ).all(): - df = df.rename(columns={column + SFX1: column}).drop(columns=[column + SFX2]) + df = df.rename(columns={column + SFX1: column}).drop(columns=[column + SFX2]) # type: ignore[union-attr,unused-ignore] else: raise ValueError(f"Values of {column} do not agree across levels: {df[[column + SFX1, column + SFX2]]}") return df @@ -126,7 +121,7 @@ def validate_and_rename_columns(df: DataFrame, columns: Iterable[str]) -> DataFr def drop_and_rename_columns(df: DataFrame, columns: Iterable[str]) -> DataFrame: columns_dict = {col + SFX1: col for col in columns} - return df.rename(columns=columns_dict).drop(columns=[col + SFX2 for col in columns]) + return df.rename(columns=columns_dict).drop(columns=[col + SFX2 for col in columns]) # type: ignore[union-attr,unused-ignore] def level2str(level: Union[str, Tuple]) -> str: @@ -136,7 +131,9 @@ def level2str(level: Union[str, Tuple]) -> str: return ", ".join([str(sub_level) for sub_level in level]) -def validate_data(df: DataFrame, columns_that_must_exist, group_columns: Iterable, ordinal_group_column: str): +def validate_data( + df: DataFrame, columns_that_must_exist, group_columns: Iterable, ordinal_group_column: Optional[str] +): """Integrity check input dataframe.""" for col in columns_that_must_exist: _validate_column(df, col) @@ -170,10 +167,12 @@ def _validate_column(df: DataFrame, col: str): def is_non_inferiority(nim) -> bool: - if isinstance(nim, float): + if nim is None: + return False + elif isinstance(nim, (int, float)): return not np.isnan(nim) - elif nim is None: - return nim is not None + else: + return False def reset_named_indices(df): diff --git a/spotify_confidence/analysis/constants.py b/spotify_confidence/analysis/constants.py index 55c6d1f..15499b2 100644 --- a/spotify_confidence/analysis/constants.py +++ b/spotify_confidence/analysis/constants.py @@ -12,7 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Dict, Tuple, Union +from typing import Any, Dict, Optional, Tuple, Union + +# TODO: Consider using TypedDict for ConfidenceComputerKwargs to properly type +# the kwargs passed to confidence computer functions. Currently using Any because +# kwargs contain heterogeneous types (column names as str, interval_size as float, +# treatment_weights as List[float], etc.) and TypedDict would be needed to express +# "when key is DENOMINATOR, value is str; when key is INTERVAL_SIZE, value is float". NUMERATOR = "numerator" NUMERATOR_SUM_OF_SQUARES = "numerator_sum_of_squares" @@ -43,7 +49,6 @@ ADJUSTED_P = "adjusted p-value" SFX1, SFX2 = "_1", "_2" STD_ERR = "std_err" -Z_CRIT = "z_crit" ALPHA = "alpha" ADJUSTED_ALPHA = "adjusted_alpha" ADJUSTED_ALPHA_POWER_SAMPLE_SIZE = "adjusted_alpha_power_sample_size" @@ -142,10 +147,6 @@ SPOT_1_FDR_TSBKY, ] -CORRECTION_METHODS_THAT_DONT_REQUIRE_METRIC_INFO = list( - set(CORRECTION_METHODS) - set(CORRECTION_METHODS_THAT_REQUIRE_METRIC_INFO) -) - NULL_HYPOTHESIS = "null_hypothesis" ALTERNATIVE_HYPOTHESIS = "alternative_hypothesis" NIM = "non-inferiority margin" @@ -157,7 +158,7 @@ PREFERENCE = "preference" PREFERENCE_TEST = "preference_used_in_test" PREFERENCE_DICT = {"smaller": DECREASE_PREFFERED, "larger": INCREASE_PREFFERED, TWO_SIDED: TWO_SIDED} -NIM_TYPE = Union[Tuple[float, str], Dict[str, Tuple[float, str]], bool] +NIM_TYPE = Union[Tuple[Optional[float], Optional[str]], Dict[Any, Tuple[Optional[float], Optional[str]]], bool] METHOD_COLUMN_NAME = "method_column_name" CHI2 = "chi-squared" TTEST = "t-test" diff --git a/spotify_confidence/analysis/frequentist/chartify_grapher.py b/spotify_confidence/analysis/frequentist/chartify_grapher.py index ead6717..933a868 100644 --- a/spotify_confidence/analysis/frequentist/chartify_grapher.py +++ b/spotify_confidence/analysis/frequentist/chartify_grapher.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Iterable, Tuple, Union +from typing import Hashable, Iterable, Optional, Union import numpy as np from bokeh.models import tools @@ -26,7 +26,7 @@ axis_format_precision, de_list_if_length_one, get_all_group_columns, - get_remaning_groups, + get_remaining_groups, level2str, listify, to_finite, @@ -53,10 +53,10 @@ class ChartifyGrapher(ConfidenceGrapherABC): def __init__( self, data_frame: DataFrame, - numerator_column: str, - denominator_column: str, - categorical_group_columns: str, - ordinal_group_column: str, + numerator_column: Optional[str], + denominator_column: Optional[str], + categorical_group_columns: Union[str, Iterable], + ordinal_group_column: Optional[str], ): self._df = data_frame self._numerator = numerator_column @@ -65,7 +65,7 @@ def __init__( self._ordinal_group_column = ordinal_group_column self._all_group_columns = get_all_group_columns(self._categorical_group_columns, self._ordinal_group_column) - def plot_summary(self, summary_df: DataFrame, groupby: Union[str, Iterable]) -> ChartGrid: + def plot_summary(self, summary_df: DataFrame, groupby: Optional[Union[str, Iterable]]) -> ChartGrid: ch = ChartGrid() if groupby is None: ch.charts.append(self._summary_plot(level_name=None, level_df=summary_df, groupby=groupby)) @@ -78,13 +78,13 @@ def plot_difference( self, difference_df, absolute, - groupby, - nims: NIM_TYPE, + groupby: Optional[Union[str, Iterable]], + nims: Optional[NIM_TYPE], use_adjusted_intervals: bool, split_plot_by_groups: bool, ) -> ChartGrid: ch = ChartGrid() - categorical_groups = get_remaning_groups(listify(groupby), self._ordinal_group_column) + categorical_groups = get_remaining_groups(listify(groupby), self._ordinal_group_column) if len(categorical_groups) == 0 or not split_plot_by_groups: ch.charts += self.plot_differece_group(absolute, difference_df, groupby, use_adjusted_intervals).charts @@ -105,13 +105,13 @@ def plot_differences( self, difference_df, absolute, - groupby, - nims: NIM_TYPE, + groupby: Optional[Union[str, Iterable]], + nims: Optional[NIM_TYPE], use_adjusted_intervals: bool, split_plot_by_groups: bool, ) -> ChartGrid: ch = ChartGrid() - categorical_groups = get_remaning_groups(listify(groupby), self._ordinal_group_column) + categorical_groups = get_remaining_groups(listify(groupby), self._ordinal_group_column) if len(categorical_groups) == 0 or not split_plot_by_groups: ch.charts += self.plot_differences_group(absolute, difference_df, groupby, use_adjusted_intervals).charts @@ -121,7 +121,7 @@ def plot_differences( return ch def plot_differences_group(self, absolute, difference_df, groupby, use_adjusted_intervals): - categorical_groups = get_remaning_groups(groupby, self._ordinal_group_column) + categorical_groups = get_remaining_groups(groupby, self._ordinal_group_column) groupby_columns = self._add_level_columns(categorical_groups) if self._ordinal_group_column in listify(groupby): ch = self._ordinal_difference_plot(difference_df, absolute, groupby_columns, use_adjusted_intervals) @@ -136,14 +136,14 @@ def plot_multiple_difference( self, difference_df, absolute, - groupby, - level_as_reference, - nims: NIM_TYPE, + groupby: Optional[Union[str, Iterable]], + level_as_reference: Optional[bool], + nims: Optional[NIM_TYPE], use_adjusted_intervals: bool, split_plot_by_groups: bool, ) -> ChartGrid: ch = ChartGrid() - categorical_groups = get_remaning_groups(listify(groupby), self._ordinal_group_column) + categorical_groups = get_remaining_groups(listify(groupby), self._ordinal_group_column) groupby = de_list_if_length_one(groupby) @@ -175,7 +175,7 @@ def plot_multiple_difference_group( def _ordinal_difference_plot( self, difference_df: DataFrame, absolute: bool, groupby: Union[str, Iterable], use_adjusted_intervals: bool ) -> Chart: - remaining_groups = get_remaning_groups(groupby, self._ordinal_group_column) + remaining_groups = get_remaining_groups(groupby, self._ordinal_group_column) if "level_1" in groupby and "level_2" in groupby: title = "Change from level_1 to level_2" @@ -238,10 +238,14 @@ def _categorical_difference_chart( axis_format, y_min, y_max = axis_format_precision( numbers=concat( [ - difference_df[LOWER], - difference_df[DIFFERENCE], - difference_df[UPPER], - difference_df[NULL_HYPOTHESIS] if NULL_HYPOTHESIS in difference_df.columns else None, + x + for x in [ + difference_df[LOWER], + difference_df[DIFFERENCE], + difference_df[UPPER], + difference_df[NULL_HYPOTHESIS] if NULL_HYPOTHESIS in difference_df.columns else None, + ] + if x is not None ], ), absolute=absolute, @@ -327,8 +331,10 @@ def _categorical_difference_chart( return chart_grid - def _summary_plot(self, level_name: Union[str, Tuple], level_df: DataFrame, groupby: Union[str, Iterable]): - remaining_groups = get_remaning_groups(self._all_group_columns, groupby) + def _summary_plot( + self, level_name: Optional[Hashable], level_df: DataFrame, groupby: Optional[Union[str, Iterable]] + ): + remaining_groups = get_remaining_groups(self._all_group_columns, groupby) if self._ordinal_group_column is not None and self._ordinal_group_column in remaining_groups: ch = self._ordinal_summary_plot(level_name, level_df, remaining_groups, groupby) else: @@ -337,12 +343,12 @@ def _summary_plot(self, level_name: Union[str, Tuple], level_df: DataFrame, grou def _ordinal_summary_plot( self, - level_name: Union[str, Tuple], + level_name: Optional[Hashable], level_df: DataFrame, remaining_groups: Union[str, Iterable], - groupby: Union[str, Iterable], + groupby: Optional[Union[str, Iterable]], ): - remaining_groups = get_remaning_groups(remaining_groups, self._ordinal_group_column) + remaining_groups = get_remaining_groups(remaining_groups, self._ordinal_group_column) title = "Estimate of {} / {}".format(self._numerator, self._denominator) y_axis_label = "{} / {}".format(self._numerator, self._denominator) return self._ordinal_plot( @@ -361,8 +367,8 @@ def _ordinal_plot( self, center_name: str, level_df: DataFrame, - groupby: Union[str, Iterable], - level_name: Union[str, Tuple], + groupby: Optional[Union[str, Iterable]], + level_name: Optional[Hashable], remaining_groups: Union[str, Iterable], absolute: bool, title: str, @@ -374,7 +380,16 @@ def _ordinal_plot( colors = "color" if remaining_groups else None axis_format, y_min, y_max = axis_format_precision( numbers=concat( - [df[LOWER], df[center_name], df[UPPER], df[NULL_HYPOTHESIS] if NULL_HYPOTHESIS in df.columns else None] + [ + x + for x in [ + df[LOWER], + df[center_name], + df[UPPER], + df[NULL_HYPOTHESIS] if NULL_HYPOTHESIS in df.columns else None, + ] + if x is not None + ] ), absolute=absolute, ) @@ -493,7 +508,7 @@ def _ordinal_multiple_difference_plot( level_as_reference: bool, use_adjusted_intervals: bool, ): - remaining_groups = get_remaning_groups(groupby, self._ordinal_group_column) + remaining_groups = get_remaining_groups(groupby, self._ordinal_group_column) groupby_columns = self._add_level_column(remaining_groups, level_as_reference) title = self._get_multiple_difference_title(difference_df, level_as_reference) y_axis_label = self._get_difference_plot_label(absolute) @@ -592,14 +607,32 @@ def add_tools( chart.figure.legend.click_policy = "hide" axis_format, y_min, y_max = axis_format_precision( numbers=concat( - [df[LOWER], df[center_name], df[UPPER], df[NULL_HYPOTHESIS] if NULL_HYPOTHESIS in df.columns else None] + [ + x + for x in [ + df[LOWER], + df[center_name], + df[UPPER], + df[NULL_HYPOTHESIS] if NULL_HYPOTHESIS in df.columns else None, + ] + if x is not None + ] ), absolute=absolute, extra_zeros=2, ) axis_format_reference_level, _, _ = axis_format_precision( numbers=concat( - [df[LOWER], df[center_name], df[UPPER], df[NULL_HYPOTHESIS] if NULL_HYPOTHESIS in df.columns else None] + [ + x + for x in [ + df[LOWER], + df[center_name], + df[UPPER], + df[NULL_HYPOTHESIS] if NULL_HYPOTHESIS in df.columns else None, + ] + if x is not None + ] ), absolute=True, extra_zeros=2, diff --git a/spotify_confidence/analysis/frequentist/chi_squared.py b/spotify_confidence/analysis/frequentist/chi_squared.py index aaaf3e8..17720fd 100644 --- a/spotify_confidence/analysis/frequentist/chi_squared.py +++ b/spotify_confidence/analysis/frequentist/chi_squared.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Iterable, Union +from typing import Iterable, Optional, Union from pandas import DataFrame @@ -32,8 +32,8 @@ def __init__( ordinal_group_column: Union[str, None] = None, interval_size: float = 0.95, correction_method: str = BONFERRONI, - confidence_computer: ConfidenceComputerABC = None, - confidence_grapher: ConfidenceGrapherABC = None, + confidence_computer: Optional[ConfidenceComputerABC] = None, + confidence_grapher: Optional[ConfidenceGrapherABC] = None, metric_column: Union[str, None] = None, treatment_column: Union[str, None] = None, ): diff --git a/spotify_confidence/analysis/frequentist/confidence_computers/bootstrap_computer.py b/spotify_confidence/analysis/frequentist/confidence_computers/bootstrap_computer.py index 5d8240f..b5a12e1 100644 --- a/spotify_confidence/analysis/frequentist/confidence_computers/bootstrap_computer.py +++ b/spotify_confidence/analysis/frequentist/confidence_computers/bootstrap_computer.py @@ -1,4 +1,4 @@ -from typing import Dict, Tuple +from typing import Any, Optional, Tuple import numpy as np from pandas import DataFrame, Series @@ -6,12 +6,12 @@ from spotify_confidence.analysis.constants import BOOTSTRAPS, CI_LOWER, CI_UPPER, INTERVAL_SIZE, SFX1, SFX2 -def point_estimate(df: DataFrame, **kwargs: Dict[str, str]) -> float: +def point_estimate(df: DataFrame, **kwargs: Any) -> float: bootstrap_samples = kwargs[BOOTSTRAPS] return df[bootstrap_samples].map(lambda a: a.mean()) -def variance(df: Series, **kwargs: Dict[str, str]) -> float: +def variance(df: Series, **kwargs: Any) -> float: bootstrap_samples = kwargs[BOOTSTRAPS] variance = df[bootstrap_samples].map(lambda a: a.var()) @@ -20,11 +20,11 @@ def variance(df: Series, **kwargs: Dict[str, str]) -> float: return variance -def std_err(row: Series, **kwargs: Dict[str, str]) -> float: +def std_err(row: Series, **kwargs: Any) -> Optional[float]: return None -def add_point_estimate_ci(df: DataFrame, **kwargs: Dict[str, str]) -> Series: +def add_point_estimate_ci(df: DataFrame, **kwargs: Any) -> DataFrame: bootstrap_samples = kwargs[BOOTSTRAPS] interval_size = kwargs[INTERVAL_SIZE] df[CI_LOWER] = df[bootstrap_samples].map(lambda a: np.percentile(a, 100 * (1 - interval_size) / 2)) @@ -32,11 +32,11 @@ def add_point_estimate_ci(df: DataFrame, **kwargs: Dict[str, str]) -> Series: return df -def p_value(row, **kwargs: Dict[str, str]) -> float: +def p_value(row, **kwargs: Any) -> float: return -1 -def ci(df, alpha_column: str, **kwargs: Dict[str, str]) -> Tuple[Series, Series]: +def ci(df, alpha_column: str, **kwargs: Any) -> Tuple[Series, Series]: bootstrap_samples = kwargs[BOOTSTRAPS] lower = df.apply( lambda row: np.percentile( @@ -53,5 +53,5 @@ def ci(df, alpha_column: str, **kwargs: Dict[str, str]) -> Tuple[Series, Series] return lower, upper -def achieved_power(df: DataFrame, mde: float, alpha: float) -> DataFrame: +def achieved_power(df: DataFrame, mde: float, alpha: float) -> Optional[DataFrame]: return None diff --git a/spotify_confidence/analysis/frequentist/confidence_computers/chi_squared_computer.py b/spotify_confidence/analysis/frequentist/confidence_computers/chi_squared_computer.py index d6e2c7b..3c44a0c 100644 --- a/spotify_confidence/analysis/frequentist/confidence_computers/chi_squared_computer.py +++ b/spotify_confidence/analysis/frequentist/confidence_computers/chi_squared_computer.py @@ -1,4 +1,4 @@ -from typing import Dict, Tuple +from typing import Any, Tuple, Union import numpy as np from pandas import DataFrame, Series @@ -18,7 +18,7 @@ ) -def point_estimate(df: DataFrame, **kwargs: Dict[str, str]) -> float: +def point_estimate(df: DataFrame, **kwargs: Any) -> float: numerator = kwargs[NUMERATOR] denominator = kwargs[DENOMINATOR] if (df[denominator] == 0).any(): @@ -26,19 +26,19 @@ def point_estimate(df: DataFrame, **kwargs: Dict[str, str]) -> float: return df[numerator] / df[denominator] -def variance(df: DataFrame, **kwargs: Dict[str, str]) -> Series: +def variance(df: DataFrame, **kwargs: Any) -> Series: variance = df[POINT_ESTIMATE] * (1 - df[POINT_ESTIMATE]) if (variance < 0).any(): raise ValueError(f"Computed variance is negative: {variance}. Please check your inputs.") return variance -def std_err(df: DataFrame, **kwargs: Dict[str, str]) -> Series: +def std_err(df: DataFrame, **kwargs: Any) -> Series: denominator = kwargs[DENOMINATOR] return np.sqrt(df[VARIANCE + SFX1] / df[denominator + SFX1] + df[VARIANCE + SFX2] / df[denominator + SFX2]) -def add_point_estimate_ci(df: DataFrame, **kwargs: Dict[str, str]) -> Series: +def add_point_estimate_ci(df: DataFrame, **kwargs: Any) -> DataFrame: numerator = kwargs[NUMERATOR] denominator = kwargs[DENOMINATOR] interval_size = kwargs[INTERVAL_SIZE] @@ -50,7 +50,7 @@ def add_point_estimate_ci(df: DataFrame, **kwargs: Dict[str, str]) -> Series: return df -def p_value(df: DataFrame, **kwargs: Dict[str, str]) -> Series: +def p_value(df: DataFrame, **kwargs: Any) -> Series: n1, n2 = kwargs[NUMERATOR] + SFX1, kwargs[NUMERATOR] + SFX2 d1, d2 = kwargs[DENOMINATOR] + SFX1, kwargs[DENOMINATOR] + SFX2 @@ -64,7 +64,7 @@ def p_value_row(row): return df.apply(p_value_row, axis=1) -def ci(df: DataFrame, alpha_column: str, **kwargs: Dict[str, str]) -> Tuple[Series, Series]: +def ci(df: DataFrame, alpha_column: str, **kwargs: Any) -> Tuple[Series, Series]: n1, n2 = kwargs[NUMERATOR] + SFX1, kwargs[NUMERATOR] + SFX2 d1, d2 = kwargs[DENOMINATOR] + SFX1, kwargs[DENOMINATOR] + SFX2 return confint_proportions_2indep( @@ -78,7 +78,7 @@ def ci(df: DataFrame, alpha_column: str, **kwargs: Dict[str, str]) -> Tuple[Seri ) -def achieved_power(df: DataFrame, mde: float, alpha: float, **kwargs: Dict[str, str]) -> DataFrame: +def achieved_power(df: DataFrame, mde: float, alpha: float, **kwargs: Any) -> Union[int, float]: n1, n2 = kwargs[NUMERATOR] + SFX1, kwargs[NUMERATOR] + SFX2 d1, d2 = kwargs[DENOMINATOR] + SFX1, kwargs[DENOMINATOR] + SFX2 diff --git a/spotify_confidence/analysis/frequentist/confidence_computers/confidence_computer.py b/spotify_confidence/analysis/frequentist/confidence_computers/confidence_computer.py index fc355ba..ee596aa 100644 --- a/spotify_confidence/analysis/frequentist/confidence_computers/confidence_computer.py +++ b/spotify_confidence/analysis/frequentist/confidence_computers/confidence_computer.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Dict, Iterable, List, Tuple, Union +from typing import Any, Iterable, List, Optional, Tuple, Union import numpy as np from numpy import isnan @@ -24,7 +24,7 @@ drop_and_rename_columns, get_all_categorical_group_columns, get_all_group_columns, - get_remaning_groups, + get_remaining_groups, groupbyApplyParallel, level2str, listify, @@ -261,13 +261,13 @@ def compute_difference( level_1: Union[str, Iterable], level_2: Union[str, Iterable], absolute: bool, - groupby: Union[str, Iterable], - nims: NIM_TYPE, - final_expected_sample_size_column: str, + groupby: Optional[Union[str, Iterable]], + nims: Optional[NIM_TYPE], + final_expected_sample_size_column: Optional[str], verbose: bool, - mde_column: str, + mde_column: Optional[str], ) -> DataFrame: - level_columns = get_remaning_groups(self._all_group_columns, groupby) + level_columns = get_remaining_groups(self._all_group_columns, groupby) difference_df = self._compute_differences( level_columns=level_columns, levels=[(level_1, level_2)], @@ -291,16 +291,16 @@ def compute_difference( def compute_multiple_difference( self, - level: Union[str, Iterable], + level: Union[str, Iterable, int], absolute: bool, - groupby: Union[str, Iterable], - level_as_reference: bool, - nims: NIM_TYPE, - final_expected_sample_size_column: str, + groupby: Optional[Union[str, Iterable]], + level_as_reference: Optional[bool], + nims: Optional[NIM_TYPE], + final_expected_sample_size_column: Optional[str], verbose: bool, - mde_column: str, + mde_column: Optional[str], ) -> DataFrame: - level_columns = get_remaning_groups(self._all_group_columns, groupby) + level_columns = get_remaining_groups(self._all_group_columns, groupby) other_levels = [ other for other in self._sufficient_statistics.groupby(level_columns, sort=False).groups.keys() @@ -340,15 +340,15 @@ def compute_multiple_difference( def compute_differences( self, - levels: List[Tuple], + levels: Union[Tuple, List[Tuple]], absolute: bool, - groupby: Union[str, Iterable], - nims: NIM_TYPE, - final_expected_sample_size_column: str, + groupby: Optional[Union[str, Iterable]], + nims: Optional[NIM_TYPE], + final_expected_sample_size_column: Optional[str], verbose: bool, - mde_column: str, + mde_column: Optional[str], ) -> DataFrame: - level_columns = get_remaning_groups(self._all_group_columns, groupby) + level_columns = get_remaining_groups(self._all_group_columns, groupby) difference_df = self._compute_differences( level_columns=level_columns, levels=[levels] if isinstance(levels, tuple) else levels, @@ -375,11 +375,11 @@ def _compute_differences( level_columns: Iterable, levels: Union[str, Iterable], absolute: bool, - groupby: Union[str, Iterable], - level_as_reference: bool, - nims: NIM_TYPE, - final_expected_sample_size_column: str, - mde_column: str, + groupby: Optional[Union[str, Iterable]], + level_as_reference: Optional[bool], + nims: Optional[NIM_TYPE], + final_expected_sample_size_column: Optional[str], + mde_column: Optional[str], ): if type(level_as_reference) is not bool: raise ValueError(f"level_as_reference must be either True or False, but is {level_as_reference}.") @@ -534,9 +534,16 @@ def join(df: DataFrame) -> DataFrame: return comparison_df - def achieved_power(self, level_1, level_2, mde, alpha, groupby): + def achieved_power( + self, + level_1: Union[str, Iterable], + level_2: Union[str, Iterable], + mde: float, + alpha: float, + groupby: Optional[Union[str, Iterable]], + ) -> DataFrame: groupby = listify(groupby) - level_columns = get_remaning_groups(self._all_group_columns, groupby) + level_columns = get_remaining_groups(self._all_group_columns, groupby) kwargs = {NUMERATOR: self._numerator, DENOMINATOR: self._denominator} return ( self._compute_differences( @@ -561,7 +568,7 @@ def achieved_power(self, level_1, level_2, mde, alpha, groupby): )[["level_1", "level_2", "achieved_power"]] -def _compute_comparisons(df: DataFrame, **kwargs: Dict) -> DataFrame: +def _compute_comparisons(df: DataFrame, **kwargs: Any) -> DataFrame: return ( df.assign(**{DIFFERENCE: lambda df: df[POINT_ESTIMATE + SFX2] - df[POINT_ESTIMATE + SFX1]}) .assign(**{STD_ERR: confidence_computers[df[kwargs[METHOD]].values[0]].std_err(df, **kwargs)}) @@ -572,7 +579,7 @@ def _compute_comparisons(df: DataFrame, **kwargs: Dict) -> DataFrame: ) -def _add_variance_reduction_rate(df: DataFrame, **kwargs: Dict) -> DataFrame: +def _add_variance_reduction_rate(df: DataFrame, **kwargs: Any) -> DataFrame: denominator = kwargs[DENOMINATOR] method_column = kwargs[METHOD] if (df[method_column] == ZTESTLINREG).any(): @@ -587,13 +594,13 @@ def _add_variance_reduction_rate(df: DataFrame, **kwargs: Dict) -> DataFrame: return df -def _add_p_value(df: DataFrame, **kwargs: Dict) -> DataFrame: +def _add_p_value(df: DataFrame, **kwargs: Any) -> DataFrame: return df.pipe(set_alpha_and_adjust_preference, **kwargs).assign( **{P_VALUE: lambda df: df.pipe(_p_value, **kwargs)} ) -def _add_ci_and_adjust_if_absolute(df: DataFrame, **kwargs: Dict) -> DataFrame: +def _add_ci_and_adjust_if_absolute(df: DataFrame, **kwargs: Any) -> DataFrame: return df.pipe(add_ci, **kwargs).pipe(_adjust_if_absolute, absolute=kwargs[ABSOLUTE]) @@ -613,13 +620,13 @@ def _adjust_if_absolute(df: DataFrame, absolute: bool) -> DataFrame: ) -def _p_value(df: DataFrame, **kwargs: Dict) -> float: +def _p_value(df: DataFrame, **kwargs: Any) -> float: if df[kwargs[METHOD]].values[0] == CHI2 and (df[NIM].notna()).any(): raise ValueError("Non-inferiority margins not supported in ChiSquared. Use StudentsTTest or ZTest instead.") return confidence_computers[df[kwargs[METHOD]].values[0]].p_value(df, **kwargs) -def _powered_effect_and_required_sample_size_from_difference_df(df: DataFrame, **kwargs: Dict) -> DataFrame: +def _powered_effect_and_required_sample_size_from_difference_df(df: DataFrame, **kwargs: Any) -> DataFrame: if df[kwargs[METHOD]].values[0] not in [ZTEST, ZTESTLINREG] and kwargs[MDE] in df: raise ValueError("Minimum detectable effects only supported for ZTest.") elif df[kwargs[METHOD]].values[0] not in [ZTEST, ZTESTLINREG] or (df[ADJUSTED_POWER].isna()).any(): diff --git a/spotify_confidence/analysis/frequentist/confidence_computers/sample_size_computer.py b/spotify_confidence/analysis/frequentist/confidence_computers/sample_size_computer.py index 80b45f9..358f5fc 100644 --- a/spotify_confidence/analysis/frequentist/confidence_computers/sample_size_computer.py +++ b/spotify_confidence/analysis/frequentist/confidence_computers/sample_size_computer.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Dict, Iterable, List, Tuple, Union +from typing import Any, Dict, Iterable, List, Optional, Tuple, Union import numpy as np from pandas import DataFrame, Series @@ -81,7 +81,7 @@ def __init__( categorical_group_columns: Union[str, Iterable], interval_size: float, correction_method: str, - metric_column: str, + metric_column: Optional[str], power: float, point_estimate_column: str, var_column: str, @@ -124,18 +124,6 @@ def __init__( self._sufficient = None - def compute_summary(self, verbose: bool) -> DataFrame: - return ( - self._sufficient_statistics - if verbose - else self._sufficient_statistics[ - self._all_group_columns - + ([self._metric_column] if self._metric_column is not None and self._single_metric else []) - + [c for c in [self._numerator, self._denominator] if c is not None] - + [POINT_ESTIMATE, CI_LOWER, CI_UPPER] - ] - ) - @property def _sufficient_statistics(self) -> DataFrame: if self._sufficient is None: @@ -158,7 +146,7 @@ def compute_sample_size( mde_column: str, nim_column: str, preferred_direction_column: str, - final_expected_sample_size_column: str, + final_expected_sample_size_column: Optional[str], ) -> DataFrame: kwargs, group_columns, sample_size_df = self._initialise_sample_size_and_power_computation( final_expected_sample_size_column, mde_column, nim_column, preferred_direction_column, treatment_weights @@ -242,7 +230,7 @@ def _initialise_sample_size_and_power_computation( def compute_optimal_weights_and_sample_size( self, sample_size_df: DataFrame, number_of_groups: int - ) -> Tuple[Iterable, int]: + ) -> Tuple[List[float], Optional[float]]: sample_size_df = ( sample_size_df.reset_index(drop=True) .assign(**{OPTIMAL_KAPPA: lambda df: df.apply(_optimal_kappa, is_binary_column=self._is_binary, axis=1)}) @@ -433,15 +421,15 @@ def _binary_variance(p: float) -> float: return 1.0 -def _optimal_weights(kappa: float, number_of_groups) -> Iterable: +def _optimal_weights(kappa: float, number_of_groups: int) -> List[float]: treatment_weight = 1 / (kappa + number_of_groups - 1) control_weight = kappa * treatment_weight return [control_weight] + [treatment_weight for _ in range(number_of_groups - 1)] def _find_optimal_group_weights_across_rows( - df: DataFrame, group_count: int, group_columns: Iterable, **kwargs: Dict -) -> (List[float], int): + df: DataFrame, group_count: int, group_columns: Iterable, **kwargs: Any +) -> Tuple[List[float], Optional[float]]: min_kappa = min(df[OPTIMAL_KAPPA]) max_kappa = max(df[OPTIMAL_KAPPA]) @@ -466,8 +454,8 @@ def _find_optimal_group_weights_across_rows( def _calculate_optimal_sample_size_given_weights( - df: DataFrame, optimal_weights: List[float], group_columns: Iterable, **kwargs: Dict -) -> int: + df: DataFrame, optimal_weights: List[float], group_columns: Iterable, **kwargs: Any +) -> Optional[float]: kwargs[TREATMENT_WEIGHTS] = optimal_weights sample_size_df = groupbyApplyParallel( df.groupby(de_list_if_length_one(group_columns), as_index=False, sort=False), diff --git a/spotify_confidence/analysis/frequentist/confidence_computers/t_test_computer.py b/spotify_confidence/analysis/frequentist/confidence_computers/t_test_computer.py index 59652fb..d6a6aff 100644 --- a/spotify_confidence/analysis/frequentist/confidence_computers/t_test_computer.py +++ b/spotify_confidence/analysis/frequentist/confidence_computers/t_test_computer.py @@ -1,4 +1,4 @@ -from typing import Dict, Tuple +from typing import Any, Tuple, Union import numpy as np from pandas import DataFrame, Series @@ -24,7 +24,7 @@ ) -def point_estimate(df: DataFrame, **kwargs: Dict[str, str]) -> float: +def point_estimate(df: DataFrame, **kwargs: Any) -> float: numerator = kwargs[NUMERATOR] denominator = kwargs[DENOMINATOR] if (df[denominator] == 0).any(): @@ -32,7 +32,7 @@ def point_estimate(df: DataFrame, **kwargs: Dict[str, str]) -> float: return df[numerator] / df[denominator] -def variance(df: DataFrame, **kwargs: Dict[str, str]) -> float: +def variance(df: DataFrame, **kwargs: Any) -> float: numerator = kwargs[NUMERATOR] denominator = kwargs[DENOMINATOR] numerator_sumsq = kwargs[NUMERATOR_SUM_OF_SQUARES] @@ -48,12 +48,12 @@ def variance(df: DataFrame, **kwargs: Dict[str, str]) -> float: return variance -def std_err(df: DataFrame, **kwargs: Dict[str, str]) -> Series: +def std_err(df: DataFrame, **kwargs: Any) -> Series: denominator = kwargs[DENOMINATOR] return np.sqrt(df[VARIANCE + SFX1] / df[denominator + SFX1] + df[VARIANCE + SFX2] / df[denominator + SFX2]) -def add_point_estimate_ci(df: DataFrame, **kwargs: Dict[str, str]) -> Series: +def add_point_estimate_ci(df: DataFrame, **kwargs: Any) -> DataFrame: denominator = kwargs[DENOMINATOR] interval_size = kwargs[INTERVAL_SIZE] df[CI_LOWER], df[CI_UPPER] = _tconfint_generic( @@ -66,14 +66,14 @@ def add_point_estimate_ci(df: DataFrame, **kwargs: Dict[str, str]) -> Series: return df -def _dof(row: Series, **kwargs: Dict[str, str]) -> float: +def _dof(df: DataFrame, **kwargs: Any) -> float: denominator = kwargs[DENOMINATOR] - v1, v2 = row[VARIANCE + SFX1], row[VARIANCE + SFX2] - n1, n2 = row[denominator + SFX1], row[denominator + SFX2] + v1, v2 = df[VARIANCE + SFX1], df[VARIANCE + SFX2] + n1, n2 = df[denominator + SFX1], df[denominator + SFX2] return (v1 / n1 + v2 / n2) ** 2 / ((v1 / n1) ** 2 / (n1 - 1) + (v2 / n2) ** 2 / (n2 - 1)) -def p_value(df: Series, **kwargs: Dict[str, str]) -> Series: +def p_value(df: DataFrame, **kwargs: Any) -> Series: _, p_value = _tstat_generic( value1=df[POINT_ESTIMATE + SFX2], value2=df[POINT_ESTIMATE + SFX1], @@ -85,7 +85,7 @@ def p_value(df: Series, **kwargs: Dict[str, str]) -> Series: return p_value -def ci(df: DataFrame, alpha_column: str, **kwargs: Dict[str, str]) -> Tuple[Series, Series]: +def ci(df: DataFrame, alpha_column: str, **kwargs: Any) -> Tuple[Series, Series]: return _tconfint_generic( mean=df[DIFFERENCE], std_mean=df[STD_ERR], @@ -95,7 +95,7 @@ def ci(df: DataFrame, alpha_column: str, **kwargs: Dict[str, str]) -> Tuple[Seri ) -def achieved_power(df: DataFrame, mde: float, alpha: float, **kwargs: Dict[str, str]) -> DataFrame: +def achieved_power(df: DataFrame, mde: float, alpha: float, **kwargs: Any) -> Union[int, float]: v1, v2 = df[VARIANCE + SFX1], df[VARIANCE + SFX2] d1, d2 = kwargs[DENOMINATOR] + SFX1, kwargs[DENOMINATOR] + SFX2 n1, n2 = df[d1], df[d2] diff --git a/spotify_confidence/analysis/frequentist/confidence_computers/z_test_computer.py b/spotify_confidence/analysis/frequentist/confidence_computers/z_test_computer.py index 0f5effd..a8b29cf 100644 --- a/spotify_confidence/analysis/frequentist/confidence_computers/z_test_computer.py +++ b/spotify_confidence/analysis/frequentist/confidence_computers/z_test_computer.py @@ -1,4 +1,4 @@ -from typing import Dict, Tuple, Union +from typing import Any, Optional, Tuple, Union import numpy as np from pandas import DataFrame, Series @@ -46,14 +46,20 @@ TWO_SIDED, VARIANCE, ) -from spotify_confidence.analysis.frequentist.sequential_bound_solver import bounds +from spotify_confidence.analysis.frequentist.sequential_bound_solver import ( + CalculationResult, + ComputationState, + bounds, +) -def sequential_bounds(t: np.array, alpha: float, sides: int, state: DataFrame = None): +def sequential_bounds( + t: np.ndarray, alpha: float, sides: int, state: Optional[ComputationState] = None +) -> CalculationResult: return bounds(t, alpha, rho=2, ztrun=8, sides=sides, max_nints=1000, state=state) -def point_estimate(df: DataFrame, **kwargs: Dict[str, str]) -> float: +def point_estimate(df: DataFrame, **kwargs: Any) -> float: numerator = kwargs[NUMERATOR] denominator = kwargs[DENOMINATOR] if (df[denominator] == 0).any(): @@ -61,7 +67,7 @@ def point_estimate(df: DataFrame, **kwargs: Dict[str, str]) -> float: return df[numerator] / df[denominator] -def variance(df: DataFrame, **kwargs: Dict[str, str]) -> float: +def variance(df: DataFrame, **kwargs: Any) -> float: numerator = kwargs[NUMERATOR] denominator = kwargs[DENOMINATOR] numerator_sumsq = kwargs[NUMERATOR_SUM_OF_SQUARES] @@ -77,12 +83,12 @@ def variance(df: DataFrame, **kwargs: Dict[str, str]) -> float: return variance -def std_err(df: Series, **kwargs: Dict[str, str]) -> float: +def std_err(df: DataFrame, **kwargs: Any) -> float: denominator = kwargs[DENOMINATOR] return np.sqrt(df[VARIANCE + SFX1] / df[denominator + SFX1] + df[VARIANCE + SFX2] / df[denominator + SFX2]) -def add_point_estimate_ci(df: Series, **kwargs: Dict[str, str]) -> Series: +def add_point_estimate_ci(df: DataFrame, **kwargs: Any) -> DataFrame: denominator = kwargs[DENOMINATOR] interval_size = kwargs[INTERVAL_SIZE] df[CI_LOWER], df[CI_UPPER] = _zconfint_generic( @@ -94,7 +100,7 @@ def add_point_estimate_ci(df: Series, **kwargs: Dict[str, str]) -> Series: return df -def p_value(df: DataFrame, **kwargs: Dict[str, str]) -> Series: +def p_value(df: DataFrame, **kwargs: Any) -> Series: _, p_value = _zstat_generic( value1=df[POINT_ESTIMATE + SFX2], value2=df[POINT_ESTIMATE + SFX1], @@ -105,13 +111,13 @@ def p_value(df: DataFrame, **kwargs: Dict[str, str]) -> Series: return p_value -def ci(df: DataFrame, alpha_column: str, **kwargs: Dict[str, str]) -> Tuple[Series, Series]: +def ci(df: DataFrame, alpha_column: str, **kwargs: Any) -> Tuple[Series, Series]: return _zconfint_generic( mean=df[DIFFERENCE], std_mean=df[STD_ERR], alpha=df[alpha_column], alternative=df[PREFERENCE_TEST].values[0] ) -def achieved_power(df: DataFrame, mde: float, alpha: float, **kwargs: Dict[str, str]) -> DataFrame: +def achieved_power(df: DataFrame, mde: float, alpha: float, **kwargs: Any) -> Union[int, float]: denominator = kwargs[DENOMINATOR] v1, v2 = df[VARIANCE + SFX1], df[VARIANCE + SFX2] n1, n2 = df[denominator + SFX1], df[denominator + SFX2] @@ -121,7 +127,7 @@ def achieved_power(df: DataFrame, mde: float, alpha: float, **kwargs: Dict[str, return power_calculation(mde, var_pooled, alpha, n1, n2) -def compute_sequential_adjusted_alpha(df: DataFrame, **kwargs: Dict[str, str]): +def compute_sequential_adjusted_alpha(df: DataFrame, **kwargs: Any): denominator = kwargs[DENOMINATOR] final_expected_sample_size_column = kwargs[FINAL_EXPECTED_SAMPLE_SIZE] ordinal_group_column = kwargs[ORDINAL_GROUP_COLUMN] @@ -204,7 +210,7 @@ def _bw(W: float, alpha: float, m_scal: float, r: int): f"{SPOT_1_HOLM}, {SPOT_1_HOMMEL} and {SPOT_1_SIMES_HOCHBERG}" ) - def _compute_ci_for_row(row: Series) -> Tuple[float, float]: + def _compute_ci_for_row(row: Series) -> Series: if row[IS_SIGNIFICANT] and num_significant == m_scal: alpha_adj = adjusted_alpha_rej_equal_m elif row[IS_SIGNIFICANT] and num_significant < m_scal: @@ -290,10 +296,10 @@ def required_sample_size( hypothetical_effect: Union[Series, float], control_avg: Union[Series, float], control_var: Union[Series, float], - z_alpha: float = None, - kappa: float = None, - proportion_of_total: Union[Series, float] = None, - z_power: float = None, + z_alpha: Optional[float] = None, + kappa: Optional[float] = None, + proportion_of_total: Optional[Union[Series, float]] = None, + z_power: Optional[float] = None, ) -> Union[Series, float]: if kappa is None: raise ValueError("kappa is None, must be postive float") @@ -323,8 +329,8 @@ def _search_MDE_binary_local_search( kappa: float, proportion_of_total: float, current_number_of_units: float, - z_alpha: float = None, - z_power: float = None, + z_alpha: Optional[float] = None, + z_power: Optional[float] = None, ): def f(x): return _find_current_powered_effect( @@ -409,8 +415,8 @@ def _search_MDE_binary( kappa: float, proportion_of_total: float, current_number_of_units: float, - z_alpha: float = None, - z_power: float = None, + z_alpha: Optional[float] = None, + z_power: Optional[float] = None, return_cost_val=False, ): candidate_effects = np.linspace(10e-9, 1 - control_avg, num=2000) @@ -448,13 +454,15 @@ def _search_MDE_binary( def _treatment_group_sample_size( - z_alpha: float, - z_power: float, - hypothetical_effect: float, - control_var: float, - treatment_var: float, + z_alpha: Optional[Union[int, float]], + z_power: Optional[Union[int, float]], + hypothetical_effect: Union[Series, int, float], + control_var: Union[Series, int, float], + treatment_var: Union[Series, int, float, np.ndarray], kappa: float, -) -> float: +) -> Union[float, np.ndarray]: + if z_alpha is None or z_power is None: + raise ValueError("z_alpha and z_power must not be None") return np.ceil(np.power((z_alpha + z_power) / abs(hypothetical_effect), 2) * (control_var / kappa + treatment_var)) @@ -467,8 +475,8 @@ def _find_current_powered_effect( kappa: float, proportion_of_total: float, current_number_of_units: float, - z_power: float = None, - z_alpha: float = None, + z_power: Optional[float] = None, + z_alpha: Optional[float] = None, ) -> float: treatment_var = _get_hypothetical_treatment_var( binary_metric=binary, diff --git a/spotify_confidence/analysis/frequentist/confidence_computers/z_test_linreg_computer.py b/spotify_confidence/analysis/frequentist/confidence_computers/z_test_linreg_computer.py index 9935c11..4cbbe95 100644 --- a/spotify_confidence/analysis/frequentist/confidence_computers/z_test_linreg_computer.py +++ b/spotify_confidence/analysis/frequentist/confidence_computers/z_test_linreg_computer.py @@ -1,5 +1,5 @@ from functools import reduce -from typing import Dict, Union +from typing import Any, Optional, Tuple, Union import numpy as np from pandas import DataFrame, Series @@ -16,7 +16,7 @@ from spotify_confidence.analysis.frequentist.confidence_computers import z_test_computer -def estimate_slope(df, **kwargs: Dict) -> DataFrame: +def estimate_slope(df, **kwargs: Any) -> DataFrame: if kwargs[FEATURE] not in df: return df @@ -51,14 +51,14 @@ def dimension(x): return df -def point_estimate(df: Series, **kwargs) -> float: +def point_estimate(df: DataFrame, **kwargs) -> float: df = estimate_slope(df, **kwargs) point_estimate = df[kwargs[NUMERATOR]] / df[kwargs[DENOMINATOR]] if REGRESSION_PARAM in df: feature_mean = df[kwargs[FEATURE]].sum() / df[kwargs[DENOMINATOR]].sum() - def lin_reg_point_estimate_delta(row: Series, feature_mean: float, **kwargs: Dict) -> Series: + def lin_reg_point_estimate_delta(row: Series, feature_mean: float, **kwargs: Any) -> Series: return dfmatmul( row[REGRESSION_PARAM], row[kwargs[FEATURE]] - feature_mean * row[kwargs[DENOMINATOR]], outer=False ) @@ -89,7 +89,7 @@ def lin_reg_variance_delta(row, **kwargs): return variance2 + variance3 -def variance(df: DataFrame, **kwargs) -> Series: +def variance(df: DataFrame, **kwargs) -> Union[float, Series]: variance1 = z_test_computer.variance(df, **kwargs) if kwargs[FEATURE] in df: computed_variances = variance1 + df.apply(lin_reg_variance_delta, axis=1, **kwargs) @@ -100,19 +100,19 @@ def variance(df: DataFrame, **kwargs) -> Series: return variance1 -def add_point_estimate_ci(df: DataFrame, **kwargs: Dict) -> DataFrame: +def add_point_estimate_ci(df: DataFrame, **kwargs: Any) -> DataFrame: return z_test_computer.add_point_estimate_ci(df, **kwargs) -def std_err(df: DataFrame, **kwargs: Dict) -> DataFrame: +def std_err(df: DataFrame, **kwargs: Any) -> float: return z_test_computer.std_err(df, **kwargs) -def p_value(df: DataFrame, **kwargs: Dict) -> DataFrame: +def p_value(df: DataFrame, **kwargs: Any) -> Series: return z_test_computer.p_value(df, **kwargs) -def ci(df: DataFrame, alpha_column: str, **kwargs: Dict) -> DataFrame: +def ci(df: DataFrame, alpha_column: str, **kwargs: Any) -> Tuple[Series, Series]: return z_test_computer.ci(df, alpha_column, **kwargs) @@ -134,10 +134,10 @@ def required_sample_size( hypothetical_effect: Union[Series, float], control_avg: Union[Series, float], control_var: Union[Series, float], - z_alpha: float = None, - kappa: float = None, - proportion_of_total: Union[Series, float] = None, - z_power: float = None, + z_alpha: Optional[float] = None, + kappa: Optional[float] = None, + proportion_of_total: Optional[Union[Series, float]] = None, + z_power: Optional[float] = None, ) -> Union[Series, float]: return z_test_computer.required_sample_size( binary, diff --git a/spotify_confidence/analysis/frequentist/experiment.py b/spotify_confidence/analysis/frequentist/experiment.py index 7a78ef1..f0f1422 100644 --- a/spotify_confidence/analysis/frequentist/experiment.py +++ b/spotify_confidence/analysis/frequentist/experiment.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Dict, Iterable, List, Tuple, Union +from typing import Dict, Iterable, List, Optional, Tuple, Union from pandas import DataFrame @@ -68,23 +68,23 @@ class Experiment(ConfidenceABC): def __init__( self, data_frame: DataFrame, - numerator_column: str, + numerator_column: Optional[str], numerator_sum_squares_column: Union[str, None], - denominator_column: str, + denominator_column: Optional[str], categorical_group_columns: Union[str, Iterable], ordinal_group_column: Union[str, None] = None, interval_size: float = 0.95, correction_method: str = BONFERRONI, - confidence_computer: ConfidenceComputerABC = None, - confidence_grapher: ConfidenceGrapherABC = None, - method_column: str = None, - bootstrap_samples_column: str = None, + confidence_computer: Optional[ConfidenceComputerABC] = None, + confidence_grapher: Optional[ConfidenceGrapherABC] = None, + method_column: Optional[str] = None, + bootstrap_samples_column: Optional[str] = None, metric_column=None, treatment_column=None, power: float = 0.8, - feature_column: str = None, - feature_sum_squares_column: str = None, - feature_cross_sum_column: str = None, + feature_column: Optional[str] = None, + feature_sum_squares_column: Optional[str] = None, + feature_cross_sum_column: Optional[str] = None, ): validate_categorical_columns(categorical_group_columns) self._df = data_frame @@ -145,11 +145,11 @@ def difference( level_1: Union[str, Tuple], level_2: Union[str, Tuple], absolute: bool = True, - groupby: Union[str, Iterable] = None, - non_inferiority_margins: NIM_TYPE = None, - final_expected_sample_size_column: str = None, + groupby: Optional[Union[str, Iterable]] = None, + non_inferiority_margins: Optional[NIM_TYPE] = None, + final_expected_sample_size_column: Optional[str] = None, verbose: bool = False, - minimum_detectable_effects_column: str = None, + minimum_detectable_effects_column: Optional[str] = None, ) -> DataFrame: self._validate_sequential(final_expected_sample_size_column, groupby) @@ -168,11 +168,11 @@ def differences( self, levels: Union[Tuple, List[Tuple]], absolute: bool = True, - groupby: Union[str, Iterable] = None, - non_inferiority_margins: NIM_TYPE = None, - final_expected_sample_size_column: str = None, + groupby: Optional[Union[str, Iterable]] = None, + non_inferiority_margins: Optional[NIM_TYPE] = None, + final_expected_sample_size_column: Optional[str] = None, verbose: bool = False, - minimum_detectable_effects_column: str = None, + minimum_detectable_effects_column: Optional[str] = None, ) -> DataFrame: self._validate_sequential(final_expected_sample_size_column, groupby) return self._confidence_computer.compute_differences( @@ -187,14 +187,14 @@ def differences( def multiple_difference( self, - level: Union[str, Tuple], + level: Union[str, Tuple, int], absolute: bool = True, - groupby: Union[str, Iterable] = None, - level_as_reference: bool = None, - non_inferiority_margins: NIM_TYPE = None, - final_expected_sample_size_column: str = None, + groupby: Optional[Union[str, Iterable]] = None, + level_as_reference: Optional[bool] = None, + non_inferiority_margins: Optional[NIM_TYPE] = None, + final_expected_sample_size_column: Optional[str] = None, verbose: bool = False, - minimum_detectable_effects_column: str = None, + minimum_detectable_effects_column: Optional[str] = None, ) -> DataFrame: self._validate_sequential(final_expected_sample_size_column, groupby) @@ -209,7 +209,7 @@ def multiple_difference( minimum_detectable_effects_column, ) - def summary_plot(self, groupby: Union[str, Iterable] = None) -> ChartGrid: + def summary_plot(self, groupby: Optional[Union[str, Iterable]] = None) -> ChartGrid: summary_df = self.summary() graph = self._confidence_grapher.plot_summary(summary_df, groupby) return graph @@ -219,10 +219,10 @@ def difference_plot( level_1: Union[str, Tuple], level_2: Union[str, Tuple], absolute: bool = True, - groupby: Union[str, Iterable] = None, - non_inferiority_margins: NIM_TYPE = None, + groupby: Optional[Union[str, Iterable]] = None, + non_inferiority_margins: Optional[NIM_TYPE] = None, use_adjusted_intervals: bool = False, - final_expected_sample_size_column: str = None, + final_expected_sample_size_column: Optional[str] = None, split_plot_by_groups: bool = False, ) -> ChartGrid: difference_df = self.difference( @@ -243,10 +243,10 @@ def differences_plot( self, levels: List[Tuple], absolute: bool = True, - groupby: Union[str, Iterable] = None, - non_inferiority_margins: NIM_TYPE = None, + groupby: Optional[Union[str, Iterable]] = None, + non_inferiority_margins: Optional[NIM_TYPE] = None, use_adjusted_intervals: bool = False, - final_expected_sample_size_column: str = None, + final_expected_sample_size_column: Optional[str] = None, split_plot_by_groups: bool = False, ) -> ChartGrid: difference_df = self.differences( @@ -259,13 +259,13 @@ def differences_plot( def multiple_difference_plot( self, - level: Union[str, Tuple], + level: Union[str, Tuple, int], absolute: bool = True, - groupby: Union[str, Iterable] = None, - level_as_reference: bool = None, - non_inferiority_margins: NIM_TYPE = None, + groupby: Optional[Union[str, Iterable]] = None, + level_as_reference: Optional[bool] = None, + non_inferiority_margins: Optional[NIM_TYPE] = None, use_adjusted_intervals: bool = False, - final_expected_sample_size_column: str = None, + final_expected_sample_size_column: Optional[str] = None, split_plot_by_groups: bool = False, ) -> ChartGrid: difference_df = self.multiple_difference( @@ -289,6 +289,8 @@ def multiple_difference_plot( return chartgrid def sample_ratio_test(self, expected_proportions: Dict) -> Tuple[float, DataFrame]: + if self._denominator is None: + raise ValueError("Denominator is not set") return sample_ratio_test( self._df, all_group_columns=self._all_group_columns, @@ -320,7 +322,9 @@ def achieved_power(self, level_1, level_2, mde, alpha, groupby=None) -> DataFram """ return self._confidence_computer.achieved_power(level_1, level_2, mde, alpha, groupby) - def _validate_sequential(self, final_expected_sample_size: float, groupby: Union[str, Iterable]): + def _validate_sequential( + self, final_expected_sample_size: Optional[Union[str, float]], groupby: Optional[Union[str, Iterable]] + ): if final_expected_sample_size is not None: if self._ordinal_group_column not in listify(groupby): raise ValueError( diff --git a/spotify_confidence/analysis/frequentist/multiple_comparison.py b/spotify_confidence/analysis/frequentist/multiple_comparison.py index 97c855f..288ddbd 100644 --- a/spotify_confidence/analysis/frequentist/multiple_comparison.py +++ b/spotify_confidence/analysis/frequentist/multiple_comparison.py @@ -1,5 +1,5 @@ from _warnings import warn -from typing import Dict, Iterable +from typing import Any, List, Optional from pandas import DataFrame from statsmodels.stats.multitest import multipletests @@ -61,11 +61,11 @@ def get_num_comparisons( df: DataFrame, correction_method: str, number_of_level_comparisons: int, - groupby: Iterable, - metric_column: str, - treatment_column: str, + groupby: List[str], + metric_column: Optional[str], + treatment_column: Optional[str], single_metric: bool, - segments: Iterable, + segments: List[str], ) -> int: if correction_method == BONFERRONI: return max( @@ -132,7 +132,7 @@ def get_num_comparisons( raise ValueError(f"Unsupported correction method: {correction_method}.") -def add_adjusted_p_and_is_significant(df: DataFrame, **kwargs: Dict) -> DataFrame: +def add_adjusted_p_and_is_significant(df: DataFrame, **kwargs: Any) -> DataFrame: n_comparisons = kwargs[NUMBER_OF_COMPARISONS] if kwargs[FINAL_EXPECTED_SAMPLE_SIZE] is not None: if kwargs[CORRECTION_METHOD] not in [ @@ -203,7 +203,7 @@ def add_adjusted_p_and_is_significant(df: DataFrame, **kwargs: Dict) -> DataFram return df -def compute_sequential_adjusted_alpha(df: DataFrame, **kwargs: Dict) -> DataFrame: +def compute_sequential_adjusted_alpha(df: DataFrame, **kwargs: Any) -> DataFrame: if df[kwargs[METHOD]].isin([ZTEST, ZTESTLINREG]).all(): adjusted_alpha = confidence_computers[ZTEST].compute_sequential_adjusted_alpha(df, **kwargs) df = df.merge(adjusted_alpha, left_index=True, right_index=True) @@ -215,7 +215,7 @@ def compute_sequential_adjusted_alpha(df: DataFrame, **kwargs: Dict) -> DataFram raise NotImplementedError("Sequential testing is only supported for z-test and z-testlinreg") -def add_ci(df: DataFrame, **kwargs: Dict) -> DataFrame: +def add_ci(df: DataFrame, **kwargs: Any) -> DataFrame: lower, upper = confidence_computers[df[kwargs[METHOD]].values[0]].ci(df, ALPHA, **kwargs) if kwargs[CORRECTION_METHOD] in [ @@ -263,7 +263,7 @@ def add_ci(df: DataFrame, **kwargs: Dict) -> DataFrame: ) -def set_alpha_and_adjust_preference(df: DataFrame, **kwargs: Dict) -> DataFrame: +def set_alpha_and_adjust_preference(df: DataFrame, **kwargs: Any) -> DataFrame: alpha_0 = 1 - kwargs[INTERVAL_SIZE] return df.assign( **{ diff --git a/spotify_confidence/analysis/frequentist/sample_size_calculator.py b/spotify_confidence/analysis/frequentist/sample_size_calculator.py index 57fa031..29bb44b 100644 --- a/spotify_confidence/analysis/frequentist/sample_size_calculator.py +++ b/spotify_confidence/analysis/frequentist/sample_size_calculator.py @@ -1,4 +1,4 @@ -from typing import Iterable, Tuple, Union +from typing import Iterable, List, Optional, Tuple, Union from pandas import DataFrame @@ -41,7 +41,7 @@ def sample_size( mde_column: str, nim_column: str, preferred_direction_column: str, - final_expected_sample_size_column: str = None, + final_expected_sample_size_column: Optional[str] = None, ) -> DataFrame: """Args: treatment_weights (Iterable): The first weight is treated as control, the rest as treatment groups. @@ -68,7 +68,7 @@ def sample_size( def optimal_weights_and_sample_size( self, sample_size_df: DataFrame, number_of_groups: int - ) -> Tuple[Iterable, int]: + ) -> Tuple[List[float], Optional[float]]: """Args: sample_size_df (DataFrame): A data frame returned by the sample_size method of this class number_of_groups (int): Number of groups in the experiment, including control diff --git a/spotify_confidence/analysis/frequentist/sequential_bound_solver.py b/spotify_confidence/analysis/frequentist/sequential_bound_solver.py index 6bd3aa4..4d2f0c3 100644 --- a/spotify_confidence/analysis/frequentist/sequential_bound_solver.py +++ b/spotify_confidence/analysis/frequentist/sequential_bound_solver.py @@ -12,12 +12,14 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import Optional + import numpy as np import pandas from scipy.stats import norm -def _alphas(alpha: np.array, phi: float, t: np.array): +def _alphas(alpha: float, phi: float, t: np.ndarray): """Alpha spending function.""" pe = np.zeros(len(t)) pd = np.zeros(len(t)) @@ -27,7 +29,7 @@ def _alphas(alpha: np.array, phi: float, t: np.array): return pe, pd -def _qp(xq: float, last: float, nints: int, yam1: float, ybm1: float, stdv: float): +def _qp(xq: float, last: np.ndarray, nints: int, yam1: float, ybm1: float, stdv: float): hlast = (ybm1 - yam1) / nints grid = np.linspace(yam1, ybm1, nints + 1) fun = last * norm.cdf(grid, xq, stdv) @@ -36,13 +38,13 @@ def _qp(xq: float, last: float, nints: int, yam1: float, ybm1: float, stdv: floa def _bsearch( - last: np.array, + last: np.ndarray, nints: int, pd: float, stdv: float, ya: float, yb: float, -) -> np.array: +) -> float: """ Note: function signature slightly modified in comparison to R implementation (which takes complete nints array instead of scalar), but should be semantically equivalent @@ -73,13 +75,13 @@ def _bsearch( _NORM_CONSTANT = 1 / np.sqrt(2 * np.pi) -def _fast_norm_pdf_prescaled(x: np.array, scale): +def _fast_norm_pdf_prescaled(x: np.ndarray, scale): norm_constant2 = _NORM_CONSTANT / scale pdf_val = norm_constant2 * np.exp(-0.5 * np.power(x, 2)) return pdf_val -def _fcab(last: np.array, nints: int, yam1: float, h: float, x: np.array, stdv: float): +def _fcab(last: np.ndarray, nints: int, yam1: float, h: float, x: np.ndarray, stdv: float): X, Y = np.meshgrid(x / stdv, (h * np.linspace(0, nints, nints + 1) + yam1) / stdv) scaled_x = Y - X pdf_prescaled = _fast_norm_pdf_prescaled(scaled_x, stdv) @@ -100,7 +102,7 @@ class ComputationState: structure may be changed anytime. """ - def __init__(self, df: pandas.DataFrame, last_fcab: np.array): + def __init__(self, df: pandas.DataFrame, last_fcab: Optional[np.ndarray]): if df is None or any(df["zb"].isnull()) or len(df) > 0 and last_fcab is None: raise ValueError() @@ -121,17 +123,17 @@ def last_fcab(self): def __eq__(self, other): if isinstance(other, ComputationState): - return self._df.equals(other._df) and np.array_equal(self._last_fcab, other._last_fcab) + return self._df.equals(other._df) and np.array_equal(self._last_fcab, other._last_fcab) # type: ignore[arg-type,unused-ignore] return False def landem( - t: np.array, + t: np.ndarray, alpha: float, phi: float, ztrun: float, state: ComputationState, - max_nints: int = None, + max_nints: Optional[int] = None, ): """ This function is a Python implementation of landem.R of ldbounds package. @@ -310,20 +312,20 @@ def state(self): return self._state -columns = ["za", "zb", "ya", "yb", "pe", "pd", "stdv", "sdproc", "nints", "information_ratio"] +_BOUND_COLUMNS = pandas.Index(["za", "zb", "ya", "yb", "pe", "pd", "stdv", "sdproc", "nints", "information_ratio"]) # Initial state to be fed into bounds() to calculate sequential bounds from scratch -EMPTY_STATE = ComputationState(df=pandas.DataFrame(index=None, columns=columns, dtype=float), last_fcab=None) +EMPTY_STATE = ComputationState(df=pandas.DataFrame(index=None, columns=_BOUND_COLUMNS, dtype=float), last_fcab=None) def bounds( - t: np.array, + t: np.ndarray, alpha: float, rho: float, ztrun: float, sides: int, - state: ComputationState = EMPTY_STATE, - max_nints=None, + state: Optional[ComputationState] = None, + max_nints: Optional[int] = None, ) -> CalculationResult: """ See landem() for parameter explanation @@ -332,9 +334,11 @@ def bounds( """ def get_input_str(): + state_df_str = state.df.to_json() if state is not None else "None" + state_fcab_str = state.last_fcab if state is not None else "None" return ( f"input params: t={t}, alpha={alpha}, sides={sides}, rho={rho}, ztrun={ztrun}," - f"state_df={state.df.to_json()}, state_fcab={state.last_fcab}, max_nints={max_nints}" + f"state_df={state_df_str}, state_fcab={state_fcab_str}, max_nints={max_nints}" ) if any(t == 0.0): diff --git a/spotify_confidence/analysis/frequentist/t_test.py b/spotify_confidence/analysis/frequentist/t_test.py index 37da0d2..c1460bf 100644 --- a/spotify_confidence/analysis/frequentist/t_test.py +++ b/spotify_confidence/analysis/frequentist/t_test.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Iterable, Union +from typing import Iterable, Optional, Union from pandas import DataFrame @@ -33,8 +33,8 @@ def __init__( ordinal_group_column: Union[str, None] = None, interval_size: float = 0.95, correction_method: str = BONFERRONI, - confidence_computer: ConfidenceComputerABC = None, - confidence_grapher: ConfidenceGrapherABC = None, + confidence_computer: Optional[ConfidenceComputerABC] = None, + confidence_grapher: Optional[ConfidenceGrapherABC] = None, metric_column: Union[str, None] = None, treatment_column: Union[str, None] = None, ): diff --git a/spotify_confidence/analysis/frequentist/z_test.py b/spotify_confidence/analysis/frequentist/z_test.py index 69ba8b2..e64115c 100644 --- a/spotify_confidence/analysis/frequentist/z_test.py +++ b/spotify_confidence/analysis/frequentist/z_test.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Iterable, Union +from typing import Iterable, Optional, Union from pandas import DataFrame @@ -33,8 +33,8 @@ def __init__( ordinal_group_column: Union[str, None] = None, interval_size: float = 0.95, correction_method: str = BONFERRONI, - confidence_computer: ConfidenceComputerABC = None, - confidence_grapher: ConfidenceGrapherABC = None, + confidence_computer: Optional[ConfidenceComputerABC] = None, + confidence_grapher: Optional[ConfidenceGrapherABC] = None, metric_column: Union[str, None] = None, treatment_column: Union[str, None] = None, power: float = 0.8, diff --git a/spotify_confidence/analysis/frequentist/z_test_linreg.py b/spotify_confidence/analysis/frequentist/z_test_linreg.py index b4c8ab1..6e4606b 100644 --- a/spotify_confidence/analysis/frequentist/z_test_linreg.py +++ b/spotify_confidence/analysis/frequentist/z_test_linreg.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Iterable, Union +from typing import Iterable, Optional, Union from pandas import DataFrame @@ -40,8 +40,8 @@ def __init__( interval_size: float = 0.95, power: float = 0.8, correction_method: str = BONFERRONI, - confidence_computer: ConfidenceComputerABC = None, - confidence_grapher: ConfidenceGrapherABC = None, + confidence_computer: Optional[ConfidenceComputerABC] = None, + confidence_grapher: Optional[ConfidenceGrapherABC] = None, ): super().__init__( data_frame=data_frame.assign(**{METHOD_COLUMN_NAME: "z-test-linreg"}), diff --git a/spotify_confidence/chartgrid.py b/spotify_confidence/chartgrid.py index 6a2e636..6bfb813 100644 --- a/spotify_confidence/chartgrid.py +++ b/spotify_confidence/chartgrid.py @@ -13,7 +13,7 @@ # limitations under the License. # TODO: Move this to chartify. -from typing import Iterable +from typing import Any, List, Optional class ChartGrid: @@ -24,10 +24,10 @@ class ChartGrid: - .show(): Render all the charts. """ - def __init__(self, charts: Iterable = None): + def __init__(self, charts: Optional[List[Any]] = None): if charts is None: charts = [] - self.charts = charts + self.charts: List[Any] = charts def show(self, format="html"): for chart in self.charts: diff --git a/tests/bayesian/test_betabinomial.py b/tests/bayesian/test_betabinomial.py index 9f621b6..626642d 100644 --- a/tests/bayesian/test_betabinomial.py +++ b/tests/bayesian/test_betabinomial.py @@ -10,6 +10,8 @@ class TestCategorical(object): + test: spotify_confidence.BetaBinomial + def setup_method(self): self.data = pd.DataFrame( { @@ -103,7 +105,7 @@ def test_multiple_difference_joint(self): self.test.multiple_difference_joint(("bad_value", "bad_value")) diff = self.test.multiple_difference_joint(("us", "test2")) - print(np.random.get_state()[1][0]) + print(np.random.get_state()[1][0]) # type: ignore assert np.allclose(diff["difference"], np.array([0.000743]), rtol=1e-05, atol=1e-06) assert np.allclose(diff["P(('us', 'test2') >= all)"], np.array([0.508644])) assert np.allclose(diff["('us', 'test2') potential loss"], np.array([-0.032459])) diff --git a/tests/frequentist/test_chisquared.py b/tests/frequentist/test_chisquared.py index 68c5320..c132c0e 100644 --- a/tests/frequentist/test_chisquared.py +++ b/tests/frequentist/test_chisquared.py @@ -9,6 +9,7 @@ from spotify_confidence.analysis.confidence_utils import power_calculation from spotify_confidence.analysis.constants import DENOMINATOR, POINT_ESTIMATE, SFX1, SFX2, VARIANCE from spotify_confidence.analysis.frequentist.confidence_computers import chi_squared_computer as computer +from spotify_confidence.analysis.frequentist.confidence_computers.confidence_computer import ConfidenceComputer def chart_data(chart_object, series_name): @@ -24,6 +25,8 @@ def chart_data(chart_object, series_name): class TestCategorical(object): + test: spotify_confidence.ChiSquared + def setup_method(self): np.random.seed(123) @@ -140,6 +143,7 @@ def test_difference(self): assert np.allclose(diff["p-value"], np.array([0.155218, 0.074866])) def test_difference_with_interval_sizes(self): + assert isinstance(self.test._confidence_computer, ConfidenceComputer) self.test._confidence_computer._interval_size = 0.99 diff = self.test.difference(("us", "control"), ("us", "test")) assert np.allclose(diff["difference"].iloc[0], 0.091694) @@ -248,7 +252,9 @@ def test_differences_plot(self): def test_multiple_difference_plot(self): with pytest.raises(ValueError): self.test.multiple_difference_plot( - ("bad_value", "bad_value"), ("bad_value", "bad_value"), level_as_reference=False + ("bad_value", "bad_value"), + ("bad_value", "bad_value"), # type: ignore[arg-type] + level_as_reference=False, ) ch = self.test.multiple_difference_plot(("us", "control"), level_as_reference=False) @@ -268,7 +274,7 @@ def test_sample_ratio_test(self): } with pytest.raises(TypeError): - self.test.sample_ratio_test("not a dict") + self.test.sample_ratio_test("not a dict") # type: ignore[arg-type] with pytest.raises(ValueError): negative = expected.copy() @@ -439,7 +445,7 @@ def test_p_value_is_symmetric(self): def test_raise_error_with_nim(self): with pytest.raises(ValueError): - self.test.difference(("control", 1), ("test", 1), non_inferiority_margins=("blah", "hah")) + self.test.difference(("control", 1), ("test", 1), non_inferiority_margins=("blah", "hah")) # type: ignore[arg-type] def test_difference(self): with pytest.raises(ValueError): diff --git a/tests/frequentist/test_experiment.py b/tests/frequentist/test_experiment.py index 1831094..4d6bbcc 100644 --- a/tests/frequentist/test_experiment.py +++ b/tests/frequentist/test_experiment.py @@ -206,16 +206,18 @@ def test_multiple_differences_plot_some_nims_doesnt_raise_exception(self, nims): assert False, f"Using non_inferiority_margins={nims} raised an exception: {e}." def get_experiment_with_some_nims(self): - columns = [ - "group_name", - "num_user", - "sum", - "sum_squares", - "method", - "metric", - "preferred_direction", - "non_inferiority_margin", - ] + columns = pd.Index( + [ + "group_name", + "num_user", + "sum", + "sum_squares", + "method", + "metric", + "preferred_direction", + "non_inferiority_margin", + ] + ) data = [ ["Control", 6267728, 3240932, 3240932, "z-test", "m1", "increase", 0.15], ["Test", 6260737, 3239706, 3239706, "z-test", "m1", "increase", 0.15], diff --git a/tests/frequentist/test_freqsamplesizecalculator.py b/tests/frequentist/test_freqsamplesizecalculator.py index 2964b8e..77eaedb 100644 --- a/tests/frequentist/test_freqsamplesizecalculator.py +++ b/tests/frequentist/test_freqsamplesizecalculator.py @@ -19,7 +19,7 @@ class TestSampleSizeCalculator(object): def test_sample_size_1(self): df = pd.DataFrame( - columns=["metric_name", "binary", "avg", "var", "mde", "nim", "preference"], + columns=pd.Index(["metric_name", "binary", "avg", "var", "mde", "nim", "preference"]), data=[ ["share_bananas_1d", True, 0.7, 0.21, 0.00617, None, "increase"], ["bananas_per_user_7d", False, 4.56, 2.13, 0.01, None, "increase"], @@ -50,6 +50,7 @@ def test_sample_size_1(self): assert ss[CI_WIDTH].isna().all() optimal_weights, optimal_sample_size = ssc.optimal_weights_and_sample_size(ss, len(treatment_weights)) + assert optimal_sample_size is not None assert len(optimal_weights) == len(treatment_weights) assert 0.999 < optimal_sample_size / 894863 < 1.001 @@ -67,7 +68,7 @@ def test_sample_size_1(self): def test_sample_size_2(self): df = pd.DataFrame( - columns=["metric_name", "binary", "avg", "var", "mde", "nim", "preference"], + columns=pd.Index(["metric_name", "binary", "avg", "var", "mde", "nim", "preference"]), data=[ ["share_bananas_1d", True, 0.7, 0.21, 0.00617, None, None], ["bananas_per_user_7d", False, 4.56, 2.13, 0.01, None, "increase"], @@ -98,6 +99,7 @@ def test_sample_size_2(self): assert ss[CI_WIDTH].isna().all() optimal_weights, optimal_sample_size = ssc.optimal_weights_and_sample_size(ss, len(treatment_weights)) + assert optimal_sample_size is not None assert len(optimal_weights) == len(treatment_weights) assert 0.999 < optimal_sample_size / 1004113 < 1.001 @@ -115,7 +117,7 @@ def test_sample_size_2(self): def test_sample_size_3(self): df = pd.DataFrame( - columns=["metric_name", "binary", "avg", "var", "mde", "nim", "preference"], + columns=pd.Index(["metric_name", "binary", "avg", "var", "mde", "nim", "preference"]), data=[ ["share_bananas_1d", True, 0.7, 0.21, 0.00617, None, None], ["bananas_per_user_7d", False, 4.56, 2.13, 0.01, None, "increase"], @@ -146,6 +148,7 @@ def test_sample_size_3(self): assert ss[CI_WIDTH].isna().all() optimal_weights, optimal_sample_size = ssc.optimal_weights_and_sample_size(ss, len(treatment_weights)) + assert optimal_sample_size is not None assert len(optimal_weights) == len(treatment_weights) assert 0.999 < optimal_sample_size / 596991 < 1.001 @@ -163,7 +166,7 @@ def test_sample_size_3(self): def test_sample_size_4(self): df = pd.DataFrame( - columns=["metric_name", "binary", "avg", "var", "mde", "nim", "preference"], + columns=pd.Index(["metric_name", "binary", "avg", "var", "mde", "nim", "preference"]), data=[ ["share_bananas_1d", True, 0.7, 0.21, None, 0.00617, "increase"], ["bananas_per_user_7d", False, 4.56, 2.13, 0.01, None, "increase"], @@ -194,6 +197,7 @@ def test_sample_size_4(self): assert ss[CI_WIDTH].isna().all() optimal_weights, optimal_sample_size = ssc.optimal_weights_and_sample_size(ss, len(treatment_weights)) + assert optimal_sample_size is not None assert len(optimal_weights) == len(treatment_weights) assert 0.999 < optimal_sample_size / 586168 < 1.001 @@ -211,7 +215,7 @@ def test_sample_size_4(self): def test_sample_size_5(self): df = pd.DataFrame( - columns=["metric_name", "binary", "avg", "var", "mde", "nim", "preference"], + columns=pd.Index(["metric_name", "binary", "avg", "var", "mde", "nim", "preference"]), data=[ ["share_bananas_1d", True, 0.7, 0.21, None, 0.00617, "increase"], ], @@ -240,6 +244,7 @@ def test_sample_size_5(self): assert ss[CI_WIDTH].isna().all() optimal_weights, optimal_sample_size = ssc.optimal_weights_and_sample_size(ss, len(treatment_weights)) + assert optimal_sample_size is not None assert len(optimal_weights) == len(treatment_weights) assert 0.999 < optimal_sample_size / 451934 < 1.001 @@ -257,7 +262,7 @@ def test_sample_size_5(self): def test_sample_size_6(self): df = pd.DataFrame( - columns=["metric_name", "binary", "avg", "var", "mde", "nim", "preference"], + columns=pd.Index(["metric_name", "binary", "avg", "var", "mde", "nim", "preference"]), data=[ ["bananas_per_user_7d", False, 4.56, 2.13, 0.01, None, None], ], @@ -286,6 +291,7 @@ def test_sample_size_6(self): assert ss[CI_WIDTH].isna().all() optimal_weights, optimal_sample_size = ssc.optimal_weights_and_sample_size(ss, len(treatment_weights)) + assert optimal_sample_size is not None assert len(optimal_weights) == len(treatment_weights) assert 0.999 < optimal_sample_size / 47854 < 1.001 @@ -303,7 +309,7 @@ def test_sample_size_6(self): def test_sample_size_7(self): df = pd.DataFrame( - columns=["metric_name", "binary", "avg", "var", "mde", "nim", "preference"], + columns=pd.Index(["metric_name", "binary", "avg", "var", "mde", "nim", "preference"]), data=[ ["share_bananas_1d", True, 0.7, 0.21, 0.00617, None, None], ["bananas_per_user_7d", False, 4.56, 2.13, 0.01, None, "increase"], @@ -334,6 +340,7 @@ def test_sample_size_7(self): assert ss[CI_WIDTH].isna().all() optimal_weights, optimal_sample_size = ssc.optimal_weights_and_sample_size(ss, len(treatment_weights)) + assert optimal_sample_size is not None assert len(optimal_weights) == len(treatment_weights) assert 0.999 < optimal_sample_size / 556565 < 1.001 @@ -351,7 +358,7 @@ def test_sample_size_7(self): def test_sample_size_8(self): df = pd.DataFrame( - columns=["metric_name", "country", "binary", "avg", "var", "mde", "nim", "preference"], + columns=pd.Index(["metric_name", "country", "binary", "avg", "var", "mde", "nim", "preference"]), data=[ ["share_bananas_1d", "denmark", True, 0.7, 0.21, 0.01, None, None], ["share_bananas_1d", "sweden", True, 0.4, 0.24, 0.01, None, None], @@ -400,7 +407,9 @@ def test_sample_size_8(self): def test_sample_size_calculation_ciwidth_nimless_with_expected_sample_size(self): df = pd.DataFrame( - columns=["metric_name", "binary", "avg", "var", "mde", "nim", "preference", "expected_sample_size"], + columns=pd.Index( + ["metric_name", "binary", "avg", "var", "mde", "nim", "preference", "expected_sample_size"] + ), data=[ ["share_bananas_1d", True, 0.7, 0.21, None, 0.0, "increase", 1e6], ["bananas_per_user_7d", False, 4.56, 2.13, 0.01, None, "increase", int(1e6)], @@ -447,7 +456,9 @@ def test_sample_size_calculation_ciwidth_nimless_with_expected_sample_size(self) def test_sample_size_calculation_ciwidth_matches_real_width_returned_by_onesided_test(self): df = pd.DataFrame( - columns=["metric_name", "binary", "avg", "var", "mde", "nim", "preference", "expected_sample_size"], + columns=pd.Index( + ["metric_name", "binary", "avg", "var", "mde", "nim", "preference", "expected_sample_size"] + ), data=[ ["bananas_per_user_7d", False, 4.56, 2.13, 0.01, None, "increase", int(1e6)], ], @@ -490,7 +501,9 @@ def test_sample_size_calculation_ciwidth_matches_real_width_returned_by_onesided def test_sample_size_calculation_ciwidth_matches_real_width_returned_by_twosided_test_with_direction(self): df = pd.DataFrame( - columns=["metric_name", "binary", "avg", "var", "mde", "nim", "preference", "expected_sample_size"], + columns=pd.Index( + ["metric_name", "binary", "avg", "var", "mde", "nim", "preference", "expected_sample_size"] + ), data=[ ["bananas_per_user_7d", False, 4.56, 2.13, 0.01, None, None, int(1e6)], ], @@ -535,7 +548,7 @@ def test_ci_bound_equal_nim(self): mde_pp = 0.15 baseline = 0.48 df = pd.DataFrame( - columns=["metric_name", "binary", "avg", "var", "mde", "nim", "preference"], + columns=pd.Index(["metric_name", "binary", "avg", "var", "mde", "nim", "preference"]), data=[ [ "share_users_with_bananas", @@ -620,7 +633,7 @@ def test_ci_bound_equal_nim_2(self): mde_pp = 0.15 baseline = 0.48 df = pd.DataFrame( - columns=["metric_name", "binary", "avg", "var", "mde", "nim", "preference"], + columns=pd.Index(["metric_name", "binary", "avg", "var", "mde", "nim", "preference"]), data=[ [ "share_users_with_bananas", @@ -707,7 +720,7 @@ def test_ci_bound_equal_mde(self): mde_pp = 0.15 baseline = 0.48 df = pd.DataFrame( - columns=["metric_name", "binary", "avg", "var", "mde", "nim", "preference"], + columns=pd.Index(["metric_name", "binary", "avg", "var", "mde", "nim", "preference"]), data=[ [ "share_users_with_bananas", @@ -793,7 +806,7 @@ def test_ci_bound_equal_mde(self): def test_sample_size_with_nan(self): df = pd.DataFrame( - columns=["metric_name", "binary", "avg", "var", "mde", "nim", "preference"], + columns=pd.Index(["metric_name", "binary", "avg", "var", "mde", "nim", "preference"]), data=[ ["share_bananas_1d", True, None, None, 0.00617, None, "increase"], ["bananas_per_user_7d", False, 4.56, 2.13, 0.01, None, "increase"], diff --git a/tests/frequentist/test_ttest.py b/tests/frequentist/test_ttest.py index 02a15fb..fc83e53 100644 --- a/tests/frequentist/test_ttest.py +++ b/tests/frequentist/test_ttest.py @@ -21,6 +21,7 @@ VARIANCE, ) from spotify_confidence.analysis.frequentist.confidence_computers import t_test_computer as computer +from spotify_confidence.analysis.frequentist.confidence_computers.confidence_computer import ConfidenceComputer def chart_data(chart_object, series_name): @@ -188,6 +189,7 @@ def test_difference_with_interval_sizes(self): https: // www.quantitativeskills.com / sisa / statistics / t - test.htm was used to validate results """ + assert isinstance(self.test._confidence_computer, ConfidenceComputer) self.test._confidence_computer._interval_size = 0.99 diff = self.test.difference(("us", "control"), ("us", "test")) @@ -226,7 +228,9 @@ def test_multiple_difference(self): def test_multiple_difference_plot(self): with pytest.raises(ValueError): self.test.multiple_difference_plot( - ("bad_value", "bad_value"), ("bad_value", "bad_value"), level_as_reference=False + ("bad_value", "bad_value"), + ("bad_value", "bad_value"), # type: ignore[arg-type] + level_as_reference=False, ) ch = self.test.multiple_difference_plot(("us", "control"), level_as_reference=False) @@ -374,7 +378,9 @@ def test_difference_plot(self): def test_multiple_difference(self): with pytest.raises(ValueError): self.test.multiple_difference( - ("bad_value", "bad_value"), ("bad_value", "bad_value"), level_as_reference=False + ("bad_value", "bad_value"), + ("bad_value", "bad_value"), # type: ignore[arg-type] + level_as_reference=False, ) diff = self.test.multiple_difference(("control", 1), level_as_reference=False) @@ -386,7 +392,9 @@ def test_multiple_difference(self): def test_multiple_difference_plot(self): with pytest.raises(ValueError): self.test.multiple_difference_plot( - ("bad_value", "bad_value"), ("bad_value", "bad_value"), level_as_reference=False + ("bad_value", "bad_value"), + ("bad_value", "bad_value"), # type: ignore[arg-type] + level_as_reference=False, ) ch = self.test.multiple_difference_plot(("control", 1), level_as_reference=False) diff --git a/tests/frequentist/test_ztest.py b/tests/frequentist/test_ztest.py index 8ca1b84..44cc0f6 100644 --- a/tests/frequentist/test_ztest.py +++ b/tests/frequentist/test_ztest.py @@ -23,6 +23,7 @@ REQUIRED_SAMPLE_SIZE, SPOT_1, ) +from spotify_confidence.analysis.frequentist.confidence_computers.confidence_computer import ConfidenceComputer class TestPoweredEffectContinuousSingleMetric(object): @@ -1507,6 +1508,7 @@ def test_summary(self): @pytest.mark.parametrize("correction_method", CORRECTION_METHODS, ids=lambda x: f"correction method: {x}") def test_difference(self, correction_method): + assert isinstance(self.test._confidence_computer, ConfidenceComputer) self.test._confidence_computer._correction_method = correction_method if BONFERRONI in correction_method: difference_df = self.test.difference(level_1=("control", "gb", 1), level_2=("test", "us", 2)) @@ -1519,6 +1521,7 @@ def test_difference(self, correction_method): @pytest.mark.parametrize("correction_method", CORRECTION_METHODS, ids=lambda x: f"correction method: {x}") def test_difference_groupby(self, correction_method): + assert isinstance(self.test._confidence_computer, ConfidenceComputer) self.test._confidence_computer._correction_method = correction_method if BONFERRONI in correction_method: difference_df = self.test.difference( @@ -1536,6 +1539,7 @@ def test_difference_groupby(self, correction_method): @pytest.mark.parametrize("correction_method", CORRECTION_METHODS, ids=lambda x: f"correction method: {x}") def test_multiple_difference(self, correction_method): + assert isinstance(self.test._confidence_computer, ConfidenceComputer) self.test._confidence_computer._correction_method = correction_method if correction_method in [ BONFERRONI, @@ -1595,6 +1599,7 @@ def test_multiple_difference(self, correction_method): @pytest.mark.parametrize("correction_method", CORRECTION_METHODS, ids=lambda x: f"correction method: {x}") def test_multiple_difference_groupby(self, correction_method): + assert isinstance(self.test._confidence_computer, ConfidenceComputer) self.test._confidence_computer._correction_method = correction_method if correction_method in [ BONFERRONI, @@ -1651,6 +1656,7 @@ def test_multiple_difference_groupby(self, correction_method): @pytest.mark.parametrize("correction_method", CORRECTION_METHODS, ids=lambda x: f"correction method: {x}") def test_difference_with_nims(self, correction_method): + assert isinstance(self.test._confidence_computer, ConfidenceComputer) self.test._confidence_computer._correction_method = correction_method df = self.test.difference( level_1=("test", "us"), @@ -1718,6 +1724,7 @@ def test_difference_with_nims(self, correction_method): @pytest.mark.parametrize("correction_method", CORRECTION_METHODS, ids=lambda x: f"correction method: {x}") def test_differece_with_nims_in_df(self, correction_method): + assert isinstance(self.test._confidence_computer, ConfidenceComputer) self.test._confidence_computer._correction_method = correction_method df = self.test.difference( @@ -2048,6 +2055,7 @@ def test_compare_series_non_inferiority_improve_postitive(self, correction_metho np.testing.assert_almost_equal(diff[CI_LOWER].values[0], -0.0723, 3) assert diff[P_VALUE].values[0] > 0.01 + assert isinstance(self.test._confidence_computer, ConfidenceComputer) self.test._confidence_computer._correction_method = correction_method diff_2 = self.test.difference(level_1="1", level_2="2", non_inferiority_margins=(0.02, "increase")) @@ -2072,6 +2080,7 @@ def test_compare_series_non_inferiority_improve_negative(self, correction_method np.testing.assert_almost_equal(diff[CI_UPPER].values[0], 0.0207, 3) assert diff[P_VALUE].values[0] < 0.01 + assert isinstance(self.test._confidence_computer, ConfidenceComputer) self.test._confidence_computer._correction_method = correction_method diff_2 = self.test.difference(level_1="1", level_2="2", non_inferiority_margins=(0.02, "decrease")) @@ -2506,6 +2515,7 @@ def setup_method(self): @pytest.mark.parametrize("correction_method", CORRECTION_METHODS, ids=lambda x: f"correction method: {x}") def test_multiple_difference_groupby(self, correction_method): + assert isinstance(self.test._confidence_computer, ConfidenceComputer) self.test._confidence_computer._correction_method = correction_method def get_diff() -> pd.DataFrame: diff --git a/tox.ini b/tox.ini index cb42e5a..3ce01bd 100644 --- a/tox.ini +++ b/tox.ini @@ -8,6 +8,7 @@ dependency_groups = dev commands = ruff check ruff format --check + ty check pytest -n auto --no-cov --basetemp={envtmpdir} {posargs} [testenv:py39-min] @@ -23,4 +24,5 @@ deps = commands = ruff check ruff format --check + ty check pytest -n auto --no-cov --basetemp={envtmpdir} {posargs}