Add ty type checker and fix errors #114

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open

elbersb wants to merge 18 commits into spotify:master from elbersb:ty

CLAUDE.md

-Original file line number
+Diff line change
@@ Expand Up / @@ -16,6 +16,10 @@ uv pip install -e . --group dev @@
     ### Testing
     ```bash
+    # IMPORTANT Run all tests across Python versions
+    # to make sure all code changes work on older Python versions
+    uv run tox -p auto
     # Run all tests with coverage
     uv run pytest
@@ Expand All / @@ -27,9 +31,6 @@ uv run pytest tests/frequentist/test_z_test.py @@
     # Run specific test
     uv run pytest tests/frequentist/test_z_test.py::test_name
-    # Run all tests across Python versions
-    uv run tox
     ```
     ### Code Quality
@@ Expand All / @@ -40,8 +41,11 @@ uv run ruff check @@
     # Run formatting
     uv run ruff format
+    # Run type checking
+    uv run ty check
     # Run all quality checks (as done in CI)
-    uv run ruff check && uv run ruff format && uv run pytest
+    uv run ruff check && uv run ruff format --check && uv run ty check && uv run pytest
     ```
     ### Build
@@ Expand Down Expand Up @@
     ## Code Style
-    Uses ruff linting and formatting.
+    - **Linting & Formatting**: Uses [ruff](https://github.com/astral-sh/ruff) for code linting and formatting
+    - **Type Checking**: Uses [ty](https://github.com/astral-sh/ty) (Astral's fast Python type checker) for static type analysis
+    - Both tools are configured in `pyproject.toml` and exclude the `examples/` directory

CONTRIBUTING.rst

-Original file line number
+Diff line change
@@ Expand Up @@
         $ uv run ruff format       # Format code
         $ uv run ruff check        # Lint code
+        $ uv run ty check          # Type check code
         $ uv run pytest            # Run tests
        To test across all supported Python versions (3.9, 3.10, 3.11, 3.12)::
@@ Expand Down @@

pyproject.toml

-Original file line number
+Diff line change
@@ Expand Up / @@ -35,6 +35,7 @@ dev = [ @@
         "pytest-cov>=4.0.0",
         "pytest-xdist>=3.0.2",
         "coverage>=7.0.0",
+        "ty>=0.0.11",
     ]
     [project.urls]
@@ Expand All / @@ -53,6 +54,12 @@ extend-exclude = ["examples/"] @@
     [tool.ruff.format]
     quote-style = "double"
+    [tool.ty.src]
+    exclude = ["examples/"]
+    [tool.ty.rules]
+    unused-ignore-comment = "ignore"
     [tool.pytest.ini_options]
     addopts = "-v -n auto --cov=spotify_confidence --cov-report=html --cov-report=xml --cov-report=term-missing"
     testpaths = ["tests"]
@@ Expand Down @@

spotify_confidence/analysis/abstract_base_classes/confidence_computer_abc.py

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -13,7 +13,7 @@
  
    # limitations under the License.

    from abc import ABC, abstractmethod

    from typing import Iterable, List, Tuple, Union

    from typing import Iterable, List, Optional, Tuple, Union

    from pandas import DataFrame

    @@ -32,11 +32,11 @@ def compute_difference(
  
            level_1: Union[str, Iterable],

            level_2: Union[str, Iterable],

            absolute: bool,

            groupby: Union[str, Iterable],

            nims: NIM_TYPE,

            final_expected_sample_size_column: str,

            groupby: Optional[Union[str, Iterable]],

            nims: Optional[NIM_TYPE],

            final_expected_sample_size_column: Optional[str],

            verbose: bool,

            mde_column: str,

            mde_column: Optional[str],

        ) -> DataFrame:

            """Return dataframe containing the difference in means between

            group 1 and 2, p-value and confidence interval

    @@ -46,42 +46,44 @@ def compute_difference(
  
        @abstractmethod

        def compute_multiple_difference(

            self,

            level: Union[str, Iterable],

            level: Union[str, Iterable, int],

            absolute: bool,

            groupby: Union[str, Iterable],

            level_as_reference: bool,

            nims: NIM_TYPE,

            final_expected_sample_size_column: str,

            groupby: Optional[Union[str, Iterable]],

            level_as_reference: Optional[bool],

            nims: Optional[NIM_TYPE],

            final_expected_sample_size_column: Optional[str],

            verbose: bool,

            mde_column: str,

            mde_column: Optional[str],

        ) -> DataFrame:

            """Return dataframe containing the difference in means between

            level and all other groups, with p-value and confidence interval

            """

            pass

        @abstractmethod

        def compute_differences(

            self,

            levels: List[Tuple],

            levels: Union[Tuple, List[Tuple]],

            absolute: bool,

            groupby: Union[str, Iterable],

            nims: NIM_TYPE,

            final_expected_sample_size_column: str,

            groupby: Optional[Union[str, Iterable]],

            nims: Optional[NIM_TYPE],

            final_expected_sample_size_column: Optional[str],

            verbose: bool,

            mde_column: str,

            mde_column: Optional[str],

        ) -> DataFrame:

            """Return dataframe containing the difference in means between

            level and all other groups, with p-value and confidence interval

            """

            pass

        @abstractmethod

        def achieved_power(

            self,

            level_1: Union[str, Iterable],

            level_2: Union[str, Iterable],

            mde: float,

            alpha: float,

            groupby: Union[str, Iterable],

            groupby: Optional[Union[str, Iterable]],

        ) -> DataFrame:

            """Calculated the achieved power of test of differences between

            level 1 and level 2 given a targeted MDE.

spotify_confidence/analysis/abstract_base_classes/confidence_grapher_abc.py

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -13,7 +13,7 @@
  
    # limitations under the License.

    from abc import ABC, abstractmethod

    from typing import Iterable, Union

    from typing import Iterable, Optional, Union

    from pandas import DataFrame

    @@ -29,13 +29,13 @@ def __init__(
  
            data_frame: DataFrame,

            numerator_column: str,

            denominator_column: str,

            categorical_group_columns: str,

            ordinal_group_column: str,

            categorical_group_columns: Union[str, Iterable],

            ordinal_group_column: Optional[str],

        ):

            pass

        @abstractmethod

        def plot_summary(self, summary_df: DataFrame, groupby: Union[str, Iterable]) -> ChartGrid:

        def plot_summary(self, summary_df: DataFrame, groupby: Optional[Union[str, Iterable]]) -> ChartGrid:

            """Plot for each group in the data_frame:

            if ordinal level exists:

    @@ -57,8 +57,8 @@ def plot_difference(
  
            self,

            difference_df: DataFrame,

            absolute: bool,

            groupby: Union[str, Iterable],

            nims: NIM_TYPE,

            groupby: Optional[Union[str, Iterable]],

            nims: Optional[NIM_TYPE],

            use_adjusted_intervals: bool,

            split_plot_by_groups: bool,

        ) -> ChartGrid:

    @@ -79,8 +79,8 @@ def plot_differences(
  
            self,

            difference_df: DataFrame,

            absolute: bool,

            groupby: Union[str, Iterable],

            nims: NIM_TYPE,

            groupby: Optional[Union[str, Iterable]],

            nims: Optional[NIM_TYPE],

            use_adjusted_intervals: bool,

            split_plot_by_groups: bool,

        ) -> ChartGrid:

    @@ -101,9 +101,9 @@ def plot_multiple_difference(
  
            self,

            difference_df: DataFrame,

            absolute: bool,

            groupby: Union[str, Iterable],

            level_as_reference: bool,

            nims: NIM_TYPE,

            groupby: Optional[Union[str, Iterable]],

            level_as_reference: Optional[bool],

            nims: Optional[NIM_TYPE],

            use_adjusted_intervals: bool,

            split_plot_by_groups: bool,

        ) -> ChartGrid:

spotify_confidence/analysis/bayesian/bayesian_base.py

-Original file line number
+Diff line change
@@ Expand Up / @@ -27,7 +27,7 @@ @@
     # warnings.simplefilter("once")
-    INITIAL_RANDOMIZATION_SEED = np.random.get_state()[1][0]
+    INITIAL_RANDOMIZATION_SEED = np.random.get_state()[1][0]  # type: ignore[index]
     def num_decimals(value: float, absolute: bool) -> int:
@@ Expand Down Expand Up / @@ -114,6 +114,11 @@ def __init__( @@
             self._all_group_columns = [column for column in self._all_group_columns if column is not None]
             self._validate_data()
+        @abstractmethod
+        def _interval(self, row):
+            """Return confidence/credible interval for a row. Must be implemented by subclasses."""
+            pass
         def _validate_data(self):
             """Integrity check input dataframe."""
             if not self._all_group_columns:
@@ Expand All / @@ -137,36 +142,6 @@ def _validate_data(self): @@
         Must be number or datetime type.""".format(ordinal_column_type)
                     )
-        @classmethod
-        def as_cumulative(
-            cls, data_frame, numerator_column, denominator_column, ordinal_group_column, categorical_group_columns=None
-        ):
-            """
-            Instantiate the class with a cumulative representation of the dataframe.
-            Sorts by the ordinal variable and calculates the cumulative sum
-            May be used for to visualize the difference between groups as a
-            time series.
-            Args:
-               data_frame (pd.DataFrame): DataFrame
-               numerator_column (str): Column name for numerator column.
-               denominator_column (str): Column name for denominator column.
-               ordinal_group_column (str): Column name for ordinal grouping
-                   (e.g. numeric or date values).
-               categorical_group_columns (str or list),
-                   Optional: Column names for categorical groupings.
-            """
-            sorted_df = data_frame.sort_values(by=ordinal_group_column)
-            cumsum_cols = [numerator_column, denominator_column]
-            if categorical_group_columns:
-                sorted_df[cumsum_cols] = sorted_df.groupby(by=categorical_group_columns)[cumsum_cols].cumsum()
-            else:
-                sorted_df[cumsum_cols] = sorted_df[cumsum_cols].cumsum()
-            return cls(sorted_df, numerator_column, denominator_column, categorical_group_columns, ordinal_group_column)
         def summary(self):
             """Return Pandas DataFrame with summary statistics."""
             return self._summary(self._data_frame, self._interval)
@@ Expand Down Expand Up @@
             return results_data_frame
-        def _all_groups(self):
-            """Return a list of all group keys.
-            Returns: list"""
-            groups = list(self._data_frame.groupby(self._all_group_columns).groups.keys())
-            return groups
         def _add_group_by_columns(self, difference_df, groupby, level_name):
             if groupby:
                 groupby = groupby[0] if len(groupby) == 1 else groupby
@@ Expand All @@
                 else:
                     for col, val in zip(groupby, level_name):
                         difference_df.insert(0, column=col, value=val)
-    # class BinomialResponse(BaseTest, metaclass=ABCMeta):
-    #     """Binomial Response Variable.
-    #     """
-    # class GaussianResponse(BaseTest, metaclass=ABCMeta):
-    #     """Base class for tests of normal response variables
-    #     E.g. Revenue per user
-    #     """
-    #     pass
-    # class PoissonResponse(BaseTest, metaclass=ABCMeta):
-    #     """Base class for tests of poisson response variables.
-    #     E.g. # of days active per user per month
-    #     """
-    #     pass
-    # class MultinomialResponse(BaseTest, metaclass=ABCMeta):
-    #     """Base class for tests of multinomial response variables.
-    #     E.g. single choice answer survey
-    #         self.
-    #     """
-    #     def __init__(self, data_frame, categorical_group_columns,
-    #                  ordinal_group_column, category_column, value_column):
-    #         self._category_column = category_column
-    #         self._value_column = value_column
-    #         super().__init__(data_frame, categorical_group_columns,
-    #                          ordinal_group_column)
-    # class CategoricalResponse(BaseTest, metaclass=ABCMeta):
-    #     """Base class for tests of categorical response variables.
-    #     E.g. multiple choice answer survey
-    #     """
-    #     def __init__(self, data_frame, categorical_group_columns,
-    #                  ordinal_group_column, category_column, value_column):
-    #         self._category_column = category_column
-    #         self._value_column = value_column
-    #         super().__init__(data_frame, categorical_group_columns,
-    #                          ordinal_group_column)
-    #     pass

spotify_confidence/analysis/bayesian/bayesian_models.py

-Original file line number
+Diff line change
@@ Expand Up @@
             )
             return results_df
-    # class GammaPoisson(PoissonResponse):
-    #     pass
-    # class DirichetMultinomial(MultinomialResponse):
-    #     def __init__(self,
-    #                  data_frame,
-    #                  group_columns,
-    #                  category_column,
-    #                  value_column,
-    #                  prior_value_column=None):
-    #         super().__init__(data_frame, group_columns, category_column,
-    #                          value_column)
-    # class Gaussian(GaussianResponse):
-    #     def __init__(self,
-    #                  data_frame,
-    #                  groupings,
-    #                  mean_col,
-    #                  std_col,
-    #                  n_col,
-    #                  time_grouping=None,
-    #                  prior_columns=None):
-    #         self.prior_lambda_column = prior_lambda_column
-    #         super(BaseGaussianResponse, self).__init__(
-    #             data_frame, groups, mean_col, std_col, n_col, time_grouping)
-    #         raise (NotImplementedError)
-    # class DirichetCategorical(CategoricalResponse):
-    #     pass

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Add ty type checker and fix errors #114

Uh oh!

Diff view

Diff view

There are no files selected for viewing

Uh oh!

Add ty type checker and fix errors #114

Are you sure you want to change the base?

Uh oh!

Add ty type checker and fix errors #114

Uh oh!

Uh oh!

Diff view

Diff view

There are no files selected for viewing

Uh oh!