diff --git a/.circleci/config.yml b/.circleci/config.yml
index e704c37df3e45..276ff2ee8314a 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -6,7 +6,7 @@ jobs:
image: ubuntu-2004:2022.04.1
resource_class: arm.large
environment:
- ENV_FILE: ci/deps/circle-38-arm64.yaml
+ ENV_FILE: ci/deps/circle-310-arm64.yaml
PYTEST_WORKERS: auto
PATTERN: "not single_cpu and not slow and not network and not clipboard and not arm_slow and not db"
PYTEST_TARGET: "pandas"
diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml
index 31e2095624347..b0b176feca2bb 100644
--- a/.github/workflows/unit-tests.yml
+++ b/.github/workflows/unit-tests.yml
@@ -31,14 +31,14 @@ jobs:
pattern: [""]
include:
- name: "Downstream Compat"
- env_file: actions-38-downstream_compat.yaml
+ env_file: actions-311-downstream_compat.yaml
pattern: "not slow and not network and not single_cpu"
pytest_target: "pandas/tests/test_downstream.py"
- name: "Minimum Versions"
env_file: actions-38-minimum_versions.yaml
pattern: "not slow and not network and not single_cpu"
- name: "Locale: it_IT"
- env_file: actions-38.yaml
+ env_file: actions-311.yaml
pattern: "not slow and not network and not single_cpu"
extra_apt: "language-pack-it"
# Use the utf8 version as the default, it has no bad side-effect.
@@ -48,7 +48,7 @@ jobs:
# It will be temporarily activated during tests with locale.setlocale
extra_loc: "it_IT"
- name: "Locale: zh_CN"
- env_file: actions-38.yaml
+ env_file: actions-311.yaml
pattern: "not slow and not network and not single_cpu"
extra_apt: "language-pack-zh-hans"
# Use the utf8 version as the default, it has no bad side-effect.
@@ -58,7 +58,7 @@ jobs:
# It will be temporarily activated during tests with locale.setlocale
extra_loc: "zh_CN"
- name: "Copy-on-Write"
- env_file: actions-310.yaml
+ env_file: actions-311.yaml
pattern: "not slow and not network and not single_cpu"
pandas_copy_on_write: "1"
- name: "Pypy"
@@ -66,7 +66,7 @@ jobs:
pattern: "not slow and not network and not single_cpu"
test_args: "--max-worker-restart 0"
- name: "Numpy Dev"
- env_file: actions-310-numpydev.yaml
+ env_file: actions-311-numpydev.yaml
pattern: "not slow and not network and not single_cpu"
test_args: "-W error::DeprecationWarning -W error::FutureWarning"
# TODO(cython3): Re-enable once next-beta(after beta 1) comes out
@@ -230,7 +230,7 @@ jobs:
/opt/python/cp38-cp38/bin/python -m venv ~/virtualenvs/pandas-dev
. ~/virtualenvs/pandas-dev/bin/activate
python -m pip install --no-cache-dir --no-deps -U pip wheel setuptools
- python -m pip install --no-cache-dir versioneer[toml] cython numpy python-dateutil pytz pytest>=7.0.0 pytest-xdist>=2.2.0 pytest-asyncio>=0.17 hypothesis>=6.46.1
+ python -m pip install --no-cache-dir versioneer[toml] cython numpy python-dateutil pytz pytest>=7.3.2 pytest-xdist>=2.2.0 pytest-asyncio>=0.17 hypothesis>=6.46.1
python setup.py build_ext -q -j$(nproc)
python -m pip install --no-cache-dir --no-build-isolation --no-use-pep517 -e .
python -m pip list
@@ -299,7 +299,7 @@ jobs:
python -m pip install --pre --extra-index-url https://pypi.anaconda.org/scipy-wheels-nightly/simple numpy
python -m pip install git+https://github.com/nedbat/coveragepy.git
python -m pip install versioneer[toml]
- python -m pip install python-dateutil pytz cython hypothesis>=6.46.1 pytest>=7.0.0 pytest-xdist>=2.2.0 pytest-cov pytest-asyncio>=0.17
+ python -m pip install python-dateutil pytz cython hypothesis>=6.46.1 pytest>=7.3.2 pytest-xdist>=2.2.0 pytest-cov pytest-asyncio>=0.17
python -m pip list
- name: Build Pandas
diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml
index c9e44fae43669..ae3a9cde9999a 100644
--- a/.github/workflows/wheels.yml
+++ b/.github/workflows/wheels.yml
@@ -74,7 +74,7 @@ jobs:
fetch-depth: 0
- name: Build wheels
- uses: pypa/cibuildwheel@v2.12.3
+ uses: pypa/cibuildwheel@v2.13.0
env:
CIBW_BUILD: ${{ matrix.python[0] }}-${{ matrix.buildplat[1] }}
@@ -173,7 +173,7 @@ jobs:
# (1. Generate sdist 2. Build wheels from sdist)
# This tests the sdists, and saves some build time
python -m pip install dist/*.gz
- pip install hypothesis>=6.34.2 pytest>=7.0.0 pytest-xdist>=2.2.0 pytest-asyncio>=0.17
+ pip install hypothesis>=6.34.2 pytest>=7.3.2 pytest-xdist>=2.2.0 pytest-asyncio>=0.17
cd .. # Not a good idea to test within the src tree
python -c "import pandas; print(pandas.__version__);
pandas.test(extra_args=['-m not clipboard and not single_cpu', '--skip-slow', '--skip-network', '--skip-db', '-n=2']);
diff --git a/asv_bench/benchmarks/join_merge.py b/asv_bench/benchmarks/join_merge.py
index eaa51730477cc..b311a861ff53f 100644
--- a/asv_bench/benchmarks/join_merge.py
+++ b/asv_bench/benchmarks/join_merge.py
@@ -317,6 +317,38 @@ def time_i8merge(self, how):
merge(self.left, self.right, how=how)
+class MergeDatetime:
+ params = [
+ [
+ ("ns", "ns"),
+ ("ms", "ms"),
+ ("ns", "ms"),
+ ],
+ [None, "Europe/Brussels"],
+ ]
+ param_names = ["units", "tz"]
+
+ def setup(self, units, tz):
+ unit_left, unit_right = units
+ N = 10_000
+ keys = Series(date_range("2012-01-01", freq="T", periods=N, tz=tz))
+ self.left = DataFrame(
+ {
+ "key": keys.sample(N * 10, replace=True).dt.as_unit(unit_left),
+ "value1": np.random.randn(N * 10),
+ }
+ )
+ self.right = DataFrame(
+ {
+ "key": keys[:8000].dt.as_unit(unit_right),
+ "value2": np.random.randn(8000),
+ }
+ )
+
+ def time_merge(self, units, tz):
+ merge(self.left, self.right)
+
+
class MergeCategoricals:
def setup(self):
self.left_object = DataFrame(
diff --git a/ci/deps/actions-310.yaml b/ci/deps/actions-310.yaml
index 5b9919d8e4c1f..f098d26770733 100644
--- a/ci/deps/actions-310.yaml
+++ b/ci/deps/actions-310.yaml
@@ -9,7 +9,7 @@ dependencies:
- cython>=0.29.33
# test dependencies
- - pytest>=7.0.0
+ - pytest>=7.3.2
- pytest-cov
- pytest-xdist>=2.2.0
- pytest-asyncio>=0.17
diff --git a/ci/deps/actions-38-downstream_compat.yaml b/ci/deps/actions-311-downstream_compat.yaml
similarity index 96%
rename from ci/deps/actions-38-downstream_compat.yaml
rename to ci/deps/actions-311-downstream_compat.yaml
index 3c498663c04df..2fe12ee9d5d79 100644
--- a/ci/deps/actions-38-downstream_compat.yaml
+++ b/ci/deps/actions-311-downstream_compat.yaml
@@ -3,14 +3,14 @@ name: pandas-dev
channels:
- conda-forge
dependencies:
- - python=3.8
+ - python=3.11
# build dependencies
- versioneer[toml]
- cython>=0.29.33
# test dependencies
- - pytest>=7.0.0
+ - pytest>=7.3.2
- pytest-cov
- pytest-xdist>=2.2.0
- pytest-asyncio>=0.17
diff --git a/ci/deps/actions-310-numpydev.yaml b/ci/deps/actions-311-numpydev.yaml
similarity index 74%
rename from ci/deps/actions-310-numpydev.yaml
rename to ci/deps/actions-311-numpydev.yaml
index ce9d656adc16a..1c21c86af713b 100644
--- a/ci/deps/actions-310-numpydev.yaml
+++ b/ci/deps/actions-311-numpydev.yaml
@@ -2,13 +2,13 @@ name: pandas-dev
channels:
- conda-forge
dependencies:
- - python=3.10
+ - python=3.11
# build dependencies
- versioneer[toml]
# test dependencies
- - pytest>=7.0.0
+ - pytest>=7.3.2
- pytest-cov
- pytest-xdist>=2.2.0
- hypothesis>=6.34.2
@@ -21,7 +21,7 @@ dependencies:
- pip:
- "cython"
- - "--extra-index-url https://pypi.anaconda.org/scipy-wheels-nightly/simple"
+ - "--extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple"
- "--pre"
- "numpy"
- "scipy"
diff --git a/ci/deps/actions-311-pyarrownightly.yaml b/ci/deps/actions-311-pyarrownightly.yaml
index 77e4fc9d2c2d9..3903f0ca9f32b 100644
--- a/ci/deps/actions-311-pyarrownightly.yaml
+++ b/ci/deps/actions-311-pyarrownightly.yaml
@@ -9,7 +9,7 @@ dependencies:
- cython>=0.29.33
# test dependencies
- - pytest>=7.0.0
+ - pytest>=7.3.2
- pytest-cov
- pytest-xdist>=2.2.0
- hypothesis>=6.34.2
diff --git a/ci/deps/actions-38-minimum_versions.yaml b/ci/deps/actions-38-minimum_versions.yaml
index 194b59b5abcf2..2b73b90477d8b 100644
--- a/ci/deps/actions-38-minimum_versions.yaml
+++ b/ci/deps/actions-38-minimum_versions.yaml
@@ -11,7 +11,7 @@ dependencies:
- cython>=0.29.33
# test dependencies
- - pytest>=7.0.0
+ - pytest>=7.3.2
- pytest-cov
- pytest-xdist>=2.2.0
- pytest-asyncio>=0.17
diff --git a/ci/deps/actions-38.yaml b/ci/deps/actions-38.yaml
index 2a968f059952e..0f8accc833a47 100644
--- a/ci/deps/actions-38.yaml
+++ b/ci/deps/actions-38.yaml
@@ -9,7 +9,7 @@ dependencies:
- cython>=0.29.33
# test dependencies
- - pytest>=7.0.0
+ - pytest>=7.3.2
- pytest-cov
- pytest-xdist>=2.2.0
- pytest-asyncio>=0.17
diff --git a/ci/deps/actions-39.yaml b/ci/deps/actions-39.yaml
index a1fba778bfc70..979d922a0d086 100644
--- a/ci/deps/actions-39.yaml
+++ b/ci/deps/actions-39.yaml
@@ -9,7 +9,7 @@ dependencies:
- cython>=0.29.33
# test dependencies
- - pytest>=7.0.0
+ - pytest>=7.3.2
- pytest-cov
- pytest-xdist>=2.2.0
- pytest-asyncio>=0.17
diff --git a/ci/deps/actions-pypy-38.yaml b/ci/deps/actions-pypy-38.yaml
index 78ab32d6e7830..1c5969bb97799 100644
--- a/ci/deps/actions-pypy-38.yaml
+++ b/ci/deps/actions-pypy-38.yaml
@@ -12,7 +12,7 @@ dependencies:
- cython>=0.29.33
# test dependencies
- - pytest>=7.0.0
+ - pytest>=7.3.2
- pytest-cov
- pytest-asyncio
- pytest-xdist>=2.2.0
diff --git a/ci/deps/circle-38-arm64.yaml b/ci/deps/circle-310-arm64.yaml
similarity index 93%
rename from ci/deps/circle-38-arm64.yaml
rename to ci/deps/circle-310-arm64.yaml
index 8f309b0781457..9ac10290c80cf 100644
--- a/ci/deps/circle-38-arm64.yaml
+++ b/ci/deps/circle-310-arm64.yaml
@@ -2,14 +2,14 @@ name: pandas-dev
channels:
- conda-forge
dependencies:
- - python=3.8
+ - python=3.10
# build dependencies
- versioneer[toml]
- cython>=0.29.33
# test dependencies
- - pytest>=7.0.0
+ - pytest>=7.3.2
- pytest-cov
- pytest-xdist>=2.2.0
- pytest-asyncio>=0.17
@@ -38,7 +38,7 @@ dependencies:
- numexpr
- openpyxl<3.1.1
- odfpy
- - pandas-gbq
+ - pandas-gbq>=0.15.0
- psycopg2
- pyarrow>=7.0.0
- pymysql
diff --git a/ci/meta.yaml b/ci/meta.yaml
index f02c7eec001fc..09ae0d7253bf7 100644
--- a/ci/meta.yaml
+++ b/ci/meta.yaml
@@ -62,7 +62,7 @@ test:
- python -c "import pandas; pandas.test(extra_args={{ extra_args }})" # [python_impl == "cpython"]
requires:
- pip
- - pytest >=7.0.0
+ - pytest >=7.3.2
- pytest-asyncio >=0.17.0
- pytest-xdist >=2.2.0
- pytest-cov
diff --git a/ci/test_wheels_windows.bat b/ci/test_wheels_windows.bat
index ba6cfef8f2dfb..ad6da74c8faed 100644
--- a/ci/test_wheels_windows.bat
+++ b/ci/test_wheels_windows.bat
@@ -4,6 +4,6 @@ pd.test(extra_args=['-m not clipboard and single_cpu', '--skip-slow', '--skip-ne
python --version
pip install pytz six numpy python-dateutil tzdata>=2022.1
-pip install hypothesis>=6.34.2 pytest>=7.0.0 pytest-xdist>=2.2.0 pytest-asyncio>=0.17
+pip install hypothesis>=6.34.2 pytest>=7.3.2 pytest-xdist>=2.2.0 pytest-asyncio>=0.17
pip install --find-links=pandas/dist --no-index pandas
python -c "%test_command%"
diff --git a/doc/source/development/maintaining.rst b/doc/source/development/maintaining.rst
index 994dfde0894f3..f7665dbed6499 100644
--- a/doc/source/development/maintaining.rst
+++ b/doc/source/development/maintaining.rst
@@ -368,11 +368,14 @@ Prerequisites
In order to be able to release a new pandas version, the next permissions are needed:
-- Merge rights to the `pandas `_,
- `pandas-wheels `_, and
+- Merge rights to the `pandas `_ and
`pandas-feedstock `_ repositories.
-- Permissions to push to main in the pandas repository, to push the new tags.
-- `Write permissions to PyPI `_
+ For the latter, open a PR adding your GitHub username to the conda-forge recipe.
+- Permissions to push to ``main`` in the pandas repository, to push the new tags.
+- `Write permissions to PyPI `_.
+- Access to our website / documentation server. Share your public key with the
+ infrastructure committee to be added to the ``authorized_keys`` file of the main
+ server user.
- Access to the social media accounts, to publish the announcements.
Pre-release
@@ -438,10 +441,10 @@ which will be triggered when the tag is pushed.
4. Create a `new GitHub release `_:
- - Title: ``Pandas ``
- Tag: ````
- - Files: ``pandas-.tar.gz`` source distribution just generated
+ - Title: ``Pandas ``
- Description: Copy the description of the last release of the same kind (release candidate, major/minor or patch release)
+ - Files: ``pandas-.tar.gz`` source distribution just generated
- Set as a pre-release: Only check for a release candidate
- Set as the latest release: Leave checked, unless releasing a patch release for an older version
(e.g. releasing 1.4.5 after 1.5 has been released)
@@ -449,6 +452,9 @@ which will be triggered when the tag is pushed.
5. The GitHub release will after some hours trigger an
`automated conda-forge PR `_.
Merge it once the CI is green, and it will generate the conda-forge packages.
+ In case a manual PR needs to be done, the version, sha256 and build fields are the
+ ones that usually need to be changed. If anything else in the recipe has changed since
+ the last release, those changes should be available in ``ci/meta.yaml``.
6. Packages for supported versions in PyPI are built in the
`MacPython repo `_.
@@ -475,8 +481,16 @@ which will be triggered when the tag is pushed.
Post-Release
````````````
-1. Update symlink to stable documentation by logging in to our web server, and
- editing ``/var/www/html/pandas-docs/stable`` to point to ``version/``.
+1. Update symlinks to stable documentation by logging in to our web server, and
+ editing ``/var/www/html/pandas-docs/stable`` to point to ``version/``
+ for major and minor releases, or ``version/`` to ``version/`` for
+ patch releases. The exact instructions are (replace the example version numbers by
+ the appropriate ones for the version you are releasing):
+
+ - Log in to the server and use the correct user.
+ - `cd /var/www/html/pandas-docs/`
+ - `ln -sfn version/2.1 stable` (for a major or minor release)
+ - `ln -sfn version/2.0.3 version/2.0` (for a patch release)
2. If releasing a major or minor release, open a PR in our source code to update
``web/pandas/versions.json``, to have the desired versions in the documentation
@@ -488,13 +502,16 @@ Post-Release
5. Open a PR with the placeholder for the release notes of the next version. See
for example `the PR for 1.5.3 `_.
+ Note that the template to use depends on whether it is a major, minor or patch release.
6. Announce the new release in the official channels (use previous announcements
for reference):
- The pandas-dev and pydata mailing lists
- - Twitter, Mastodon and Telegram
+ - Twitter, Mastodon, Telegram and LinkedIn
+7. Update this release instructions to fix anything incorrect and to update about any
+ change since the last release.
.. _governance documents: https://github.com/pandas-dev/pandas-governance
.. _list of permissions: https://docs.github.com/en/organizations/managing-access-to-your-organizations-repositories/repository-roles-for-an-organization
diff --git a/doc/source/user_guide/basics.rst b/doc/source/user_guide/basics.rst
index 5c2f2410e688c..82589444e6803 100644
--- a/doc/source/user_guide/basics.rst
+++ b/doc/source/user_guide/basics.rst
@@ -2144,7 +2144,7 @@ different numeric dtypes will **NOT** be combined. The following example will gi
{
"A": pd.Series(np.random.randn(8), dtype="float16"),
"B": pd.Series(np.random.randn(8)),
- "C": pd.Series(np.array(np.random.randn(8), dtype="uint8")),
+ "C": pd.Series(np.random.randint(0, 255, size=8), dtype="uint8"),
}
)
df2
diff --git a/doc/source/whatsnew/index.rst b/doc/source/whatsnew/index.rst
index f2d2a54e4d1fe..3905c320be023 100644
--- a/doc/source/whatsnew/index.rst
+++ b/doc/source/whatsnew/index.rst
@@ -16,6 +16,7 @@ Version 2.0
.. toctree::
:maxdepth: 2
+ v2.0.3
v2.0.2
v2.0.1
v2.0.0
diff --git a/doc/source/whatsnew/v0.10.0.rst b/doc/source/whatsnew/v0.10.0.rst
index bd47e6e4bc025..4806f88924baa 100644
--- a/doc/source/whatsnew/v0.10.0.rst
+++ b/doc/source/whatsnew/v0.10.0.rst
@@ -181,7 +181,7 @@ labeled the aggregated group with the end of the interval: the next day).
``X0``, ``X1``, ...) can be reproduced by specifying ``prefix='X'``:
.. ipython:: python
- :okwarning:
+ :okexcept:
import io
diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst
index e0fc6b90708dc..ca208be69a9a5 100644
--- a/doc/source/whatsnew/v2.0.0.rst
+++ b/doc/source/whatsnew/v2.0.0.rst
@@ -1411,4 +1411,4 @@ Other
Contributors
~~~~~~~~~~~~
-.. contributors:: v1.5.0rc0..v2.0.0|HEAD
+.. contributors:: v1.5.0rc0..v2.0.0
diff --git a/doc/source/whatsnew/v2.0.1.rst b/doc/source/whatsnew/v2.0.1.rst
index 2613d12e43400..f027ec9107816 100644
--- a/doc/source/whatsnew/v2.0.1.rst
+++ b/doc/source/whatsnew/v2.0.1.rst
@@ -65,4 +65,4 @@ Other
Contributors
~~~~~~~~~~~~
-.. contributors:: v2.0.0..v2.0.1|HEAD
+.. contributors:: v2.0.0..v2.0.1
diff --git a/doc/source/whatsnew/v2.0.2.rst b/doc/source/whatsnew/v2.0.2.rst
index 81272e5ddb5bc..9625a4193fe56 100644
--- a/doc/source/whatsnew/v2.0.2.rst
+++ b/doc/source/whatsnew/v2.0.2.rst
@@ -53,4 +53,4 @@ Other
Contributors
~~~~~~~~~~~~
-.. contributors:: v2.0.1..v2.0.2|HEAD
+.. contributors:: v2.0.1..v2.0.2
diff --git a/doc/source/whatsnew/v2.0.3.rst b/doc/source/whatsnew/v2.0.3.rst
new file mode 100644
index 0000000000000..3e2dc1b92c779
--- /dev/null
+++ b/doc/source/whatsnew/v2.0.3.rst
@@ -0,0 +1,46 @@
+.. _whatsnew_203:
+
+What's new in 2.0.3 (June 28, 2023)
+-----------------------------------
+
+These are the changes in pandas 2.0.3. See :ref:`release` for a full changelog
+including other versions of pandas.
+
+{{ header }}
+
+.. ---------------------------------------------------------------------------
+.. _whatsnew_203.regressions:
+
+Fixed regressions
+~~~~~~~~~~~~~~~~~
+- Bug in :meth:`Timestamp.weekday`` was returning incorrect results before ``'0000-02-29'`` (:issue:`53738`)
+- Fixed performance regression in merging on datetime-like columns (:issue:`53231`)
+- Fixed regression when :meth:`DataFrame.to_string` creates extra space for string dtypes (:issue:`52690`)
+
+.. ---------------------------------------------------------------------------
+.. _whatsnew_203.bug_fixes:
+
+Bug fixes
+~~~~~~~~~
+- Bug in :func:`DataFrame.convert_dtype` and :func:`Series.convert_dtype` when trying to convert :class:`ArrowDtype` with ``dtype_backend="nullable_numpy"`` (:issue:`53648`)
+- Bug in :func:`RangeIndex.union` when using ``sort=True`` with another :class:`RangeIndex` (:issue:`53490`)
+- Bug in :func:`Series.reindex` when expanding a non-nanosecond datetime or timedelta :class:`Series` would not fill with ``NaT`` correctly (:issue:`53497`)
+- Bug in :func:`read_csv` when defining ``dtype`` with ``bool[pyarrow]`` for the ``"c"`` and ``"python"`` engines (:issue:`53390`)
+- Bug in :meth:`Series.str.split` and :meth:`Series.str.rsplit` with ``expand=True`` for :class:`ArrowDtype` with ``pyarrow.string`` (:issue:`53532`)
+- Bug in indexing methods (e.g. :meth:`DataFrame.__getitem__`) where taking the entire :class:`DataFrame`/:class:`Series` would raise an ``OverflowError`` when Copy on Write was enabled and the length of the array was over the maximum size a 32-bit integer can hold (:issue:`53616`)
+- Bug when constructing a :class:`DataFrame` with columns of an :class:`ArrowDtype` with a ``pyarrow.dictionary`` type that reindexes the data (:issue:`53617`)
+- Bug when indexing a :class:`DataFrame` or :class:`Series` with an :class:`Index` with a timestamp :class:`ArrowDtype` would raise an ``AttributeError`` (:issue:`53644`)
+
+.. ---------------------------------------------------------------------------
+.. _whatsnew_203.other:
+
+Other
+~~~~~
+
+.. ---------------------------------------------------------------------------
+.. _whatsnew_203.contributors:
+
+Contributors
+~~~~~~~~~~~~
+
+.. contributors:: v2.0.2..v2.0.3|HEAD
diff --git a/environment.yml b/environment.yml
index 3ecc9763e5953..d312c6b161164 100644
--- a/environment.yml
+++ b/environment.yml
@@ -11,7 +11,7 @@ dependencies:
- cython=0.29.33
# test dependencies
- - pytest>=7.0.0
+ - pytest>=7.3.2
- pytest-cov
- pytest-xdist>=2.2.0
- pytest-asyncio>=0.17
diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index 37eede59e257d..a918cda157eb6 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -651,7 +651,7 @@ ctypedef fused int6432_t:
@cython.wraparound(False)
@cython.boundscheck(False)
-def is_range_indexer(ndarray[int6432_t, ndim=1] left, int n) -> bool:
+def is_range_indexer(ndarray[int6432_t, ndim=1] left, Py_ssize_t n) -> bool:
"""
Perform an element by element comparison on 1-d integer arrays, meant for indexer
comparisons
diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx
index 2839730ca46bd..0bd0597f32ad0 100644
--- a/pandas/_libs/parsers.pyx
+++ b/pandas/_libs/parsers.pyx
@@ -117,6 +117,8 @@ from pandas.core.dtypes.common import (
from pandas.core.dtypes.dtypes import CategoricalDtype
from pandas.core.dtypes.inference import is_dict_like
+from pandas.core.arrays.boolean import BooleanDtype
+
cdef:
float64_t INF = np.inf
float64_t NEGINF = -INF
@@ -1167,7 +1169,9 @@ cdef class TextReader:
array_type = dtype.construct_array_type()
try:
# use _from_sequence_of_strings if the class defines it
- if is_bool_dtype(dtype):
+ if isinstance(dtype, BooleanDtype):
+ # xref GH 47534: BooleanArray._from_sequence_of_strings has extra
+ # kwargs
true_values = [x.decode() for x in self.true_values]
false_values = [x.decode() for x in self.false_values]
result = array_type._from_sequence_of_strings(
diff --git a/pandas/_libs/tslibs/ccalendar.pyx b/pandas/_libs/tslibs/ccalendar.pyx
index 19c732e2a313b..8b5b079649c2e 100644
--- a/pandas/_libs/tslibs/ccalendar.pyx
+++ b/pandas/_libs/tslibs/ccalendar.pyx
@@ -2,7 +2,6 @@
"""
Cython implementations of functions resembling the stdlib calendar module
"""
-
cimport cython
from numpy cimport (
int32_t,
@@ -19,7 +18,7 @@ cdef int32_t* days_per_month_array = [
31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31,
31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
-cdef int* sakamoto_arr = [0, 3, 2, 5, 0, 3, 5, 1, 4, 6, 2, 4]
+cdef int* em = [0, 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334]
# The first 13 entries give the month days elapsed as of the first of month N
# (or the total number of days in the year for N=13) in non-leap years.
@@ -76,11 +75,22 @@ cpdef int32_t get_days_in_month(int year, Py_ssize_t month) nogil:
@cython.wraparound(False)
@cython.boundscheck(False)
-@cython.cdivision
-cdef int dayofweek(int y, int m, int d) nogil:
+@cython.cdivision(True)
+cdef long quot(long a , long b) noexcept nogil:
+ cdef long x
+ x = a/b
+ if (a < 0):
+ x -= (a % b != 0)
+ return x
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+@cython.cdivision(True)
+cdef int dayofweek(int y, int m, int d) noexcept nogil:
"""
Find the day of week for the date described by the Y/M/D triple y, m, d
- using Sakamoto's method, from wikipedia.
+ using Gauss' method, from wikipedia.
0 represents Monday. See [1]_.
@@ -103,16 +113,27 @@ cdef int dayofweek(int y, int m, int d) nogil:
[1] https://docs.python.org/3/library/calendar.html#calendar.weekday
[2] https://en.wikipedia.org/wiki/\
- Determination_of_the_day_of_the_week#Sakamoto.27s_methods
+ Determination_of_the_day_of_the_week#Gauss's_algorithm
"""
+ # Note: this particular implementation comes from
+ # http://berndt-schwerdtfeger.de/wp-content/uploads/pdf/cal.pdf
cdef:
- int day
-
- y -= m < 3
- day = (y + y / 4 - y / 100 + y / 400 + sakamoto_arr[m - 1] + d) % 7
- # convert to python day
- return (day + 6) % 7
-
+ long c
+ int g
+ int f
+ int e
+
+ if (m < 3):
+ y -= 1
+
+ c = quot(y, 100)
+ g = y - c * 100
+ f = 5 * (c - quot(c, 4) * 4)
+ e = em[m]
+
+ if (m > 2):
+ e -= 1
+ return (-1 + d + e + f + g + g/4) % 7
cdef bint is_leapyear(int64_t year) nogil:
"""
diff --git a/pandas/compat/_optional.py b/pandas/compat/_optional.py
index 01ac462eeb659..d52b35953b507 100644
--- a/pandas/compat/_optional.py
+++ b/pandas/compat/_optional.py
@@ -33,7 +33,7 @@
"pymysql": "1.0.2",
"pyarrow": "7.0.0",
"pyreadstat": "1.1.2",
- "pytest": "7.0.0",
+ "pytest": "7.3.2",
"pyxlsb": "1.0.8",
"s3fs": "2021.08.0",
"scipy": "1.7.1",
diff --git a/pandas/compat/numpy/__init__.py b/pandas/compat/numpy/__init__.py
index d747f20b8499f..0d86a63f6dbda 100644
--- a/pandas/compat/numpy/__init__.py
+++ b/pandas/compat/numpy/__init__.py
@@ -11,6 +11,7 @@
np_version_gte1p22 = _nlv >= Version("1.22")
np_version_gte1p24 = _nlv >= Version("1.24")
np_version_gte1p24p3 = _nlv >= Version("1.24.3")
+np_version_gte1p25 = _nlv >= Version("1.25")
is_numpy_dev = _nlv.dev is not None
_min_numpy_ver = "1.20.3"
diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
index 2dbd9465be3c6..75dd4cfb23a5d 100644
--- a/pandas/core/dtypes/cast.py
+++ b/pandas/core/dtypes/cast.py
@@ -96,6 +96,8 @@
notna,
)
+from pandas.io._util import _arrow_dtype_mapping
+
if TYPE_CHECKING:
from pandas import Index
from pandas.core.arrays import (
@@ -583,9 +585,16 @@ def maybe_promote(dtype: np.dtype, fill_value=np.nan):
If fill_value is a non-scalar and dtype is not object.
"""
orig = fill_value
+ orig_is_nat = False
if checknull(fill_value):
# https://github.com/pandas-dev/pandas/pull/39692#issuecomment-1441051740
# avoid cache misses with NaN/NaT values that are not singletons
+ if fill_value is not NA:
+ try:
+ orig_is_nat = np.isnat(fill_value)
+ except TypeError:
+ pass
+
fill_value = _canonical_nans.get(type(fill_value), fill_value)
# for performance, we are using a cached version of the actual implementation
@@ -601,8 +610,10 @@ def maybe_promote(dtype: np.dtype, fill_value=np.nan):
# if fill_value is not hashable (required for caching)
dtype, fill_value = _maybe_promote(dtype, fill_value)
- if dtype == _dtype_obj and orig is not None:
- # GH#51592 restore our potentially non-canonical fill_value
+ if (dtype == _dtype_obj and orig is not None) or (
+ orig_is_nat and np.datetime_data(orig)[0] != "ns"
+ ):
+ # GH#51592,53497 restore our potentially non-canonical fill_value
fill_value = orig
return dtype, fill_value
@@ -1037,6 +1048,8 @@ def convert_dtypes(
"""
inferred_dtype: str | DtypeObj
+ from pandas.core.arrays.arrow.dtype import ArrowDtype
+
if (
convert_string or convert_integer or convert_boolean or convert_floating
) and isinstance(input_array, np.ndarray):
@@ -1119,7 +1132,6 @@ def convert_dtypes(
if dtype_backend == "pyarrow":
from pandas.core.arrays.arrow.array import to_pyarrow_type
- from pandas.core.arrays.arrow.dtype import ArrowDtype
from pandas.core.arrays.string_ import StringDtype
assert not isinstance(inferred_dtype, str)
@@ -1147,6 +1159,9 @@ def convert_dtypes(
pa_type = to_pyarrow_type(base_dtype)
if pa_type is not None:
inferred_dtype = ArrowDtype(pa_type)
+ elif dtype_backend == "numpy_nullable" and isinstance(inferred_dtype, ArrowDtype):
+ # GH 53648
+ inferred_dtype = _arrow_dtype_mapping()[inferred_dtype.pyarrow_dtype]
# error: Incompatible return value type (got "Union[str, Union[dtype[Any],
# ExtensionDtype]]", expected "Union[dtype[Any], ExtensionDtype]")
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index e2b8400188136..6df553fd57ebd 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -3201,7 +3201,7 @@ def union(self, other, sort=None):
return self._wrap_setop_result(other, result)
- def _union(self, other: Index, sort):
+ def _union(self, other: Index, sort: bool | None):
"""
Specific union logic should go here. In subclasses, union behavior
should be overwritten here rather than in `self.union`.
@@ -3212,6 +3212,7 @@ def _union(self, other: Index, sort):
sort : False or None, default False
Whether to sort the resulting index.
+ * True : sort the result
* False : do not sort the result.
* None : sort the result, except when `self` and `other` are equal
or when the values cannot be compared.
@@ -3224,7 +3225,7 @@ def _union(self, other: Index, sort):
rvals = other._values
if (
- sort is None
+ sort in (None, True)
and self.is_monotonic_increasing
and other.is_monotonic_increasing
and not (self.has_duplicates and other.has_duplicates)
@@ -5879,7 +5880,9 @@ def _get_indexer_strict(self, key, axis_name: str_t) -> tuple[Index, np.ndarray]
if isinstance(key, Index):
# GH 42790 - Preserve name from an Index
keyarr.name = key.name
- if keyarr.dtype.kind in ["m", "M"]:
+ if (
+ isinstance(keyarr.dtype, np.dtype) and keyarr.dtype.kind in ["m", "M"]
+ ) or isinstance(keyarr.dtype, DatetimeTZDtype):
# DTI/TDI.take can infer a freq in some cases when we dont want one
if isinstance(key, list) or (
isinstance(key, type(self))
@@ -7209,11 +7212,19 @@ def _unpack_nested_dtype(other: Index) -> Index:
-------
Index
"""
+ from pandas.core.arrays.arrow import ArrowDtype
+
dtype = other.dtype
if isinstance(dtype, CategoricalDtype):
# If there is ever a SparseIndex, this could get dispatched
# here too.
return dtype.categories
+ elif isinstance(dtype, ArrowDtype):
+ # GH 53617
+ import pyarrow as pa
+
+ if pa.types.is_dictionary(dtype.pyarrow_dtype):
+ other = other.astype(ArrowDtype(dtype.pyarrow_dtype.value_type))
return other
diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py
index 88ac3928b4b53..03de8a1f32079 100644
--- a/pandas/core/indexes/range.py
+++ b/pandas/core/indexes/range.py
@@ -607,7 +607,7 @@ def _range_in_self(self, other: range) -> bool:
return False
return other.start in self._range and other[-1] in self._range
- def _union(self, other: Index, sort):
+ def _union(self, other: Index, sort: bool | None):
"""
Form the union of two Index objects and sorts if possible
@@ -615,9 +615,9 @@ def _union(self, other: Index, sort):
----------
other : Index or array-like
- sort : False or None, default None
+ sort : bool or None, default None
Whether to sort (monotonically increasing) the resulting index.
- ``sort=None`` returns a ``RangeIndex`` if possible or a sorted
+ ``sort=None|True`` returns a ``RangeIndex`` if possible or a sorted
``Index`` with a int64 dtype if not.
``sort=False`` can return a ``RangeIndex`` if self is monotonically
increasing and other is fully contained in self. Otherwise, returns
@@ -628,7 +628,7 @@ def _union(self, other: Index, sort):
union : Index
"""
if isinstance(other, RangeIndex):
- if sort is None or (
+ if sort in (None, True) or (
sort is False and self.step > 0 and self._range_in_self(other._range)
):
# GH 47557: Can still return a RangeIndex
diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py
index 4c9cfb476586e..b1fb6773648e8 100644
--- a/pandas/core/reshape/merge.py
+++ b/pandas/core/reshape/merge.py
@@ -2370,7 +2370,14 @@ def _factorize_keys(
rk = extract_array(rk, extract_numpy=True, extract_range=True)
# TODO: if either is a RangeIndex, we can likely factorize more efficiently?
- if isinstance(lk.dtype, DatetimeTZDtype) and isinstance(rk.dtype, DatetimeTZDtype):
+ if (
+ isinstance(lk.dtype, DatetimeTZDtype) and isinstance(rk.dtype, DatetimeTZDtype)
+ ) or (
+ isinstance(lk.dtype, np.dtype)
+ and lk.dtype.kind == "M"
+ and isinstance(rk.dtype, np.dtype)
+ and rk.dtype.kind == "M"
+ ):
# Extract the ndarray (UTC-localized) values
# Note: we dont need the dtypes to match, as these can still be compared
lk, rk = cast("DatetimeArray", lk)._ensure_matching_resos(rk)
@@ -2403,6 +2410,13 @@ def _factorize_keys(
# "_values_for_factorize"
rk, _ = rk._values_for_factorize() # type: ignore[union-attr]
+ if needs_i8_conversion(lk.dtype) and lk.dtype == rk.dtype:
+ # GH#23917 TODO: Needs tests for non-matching dtypes
+ # GH#23917 TODO: needs tests for case where lk is integer-dtype
+ # and rk is datetime-dtype
+ lk = np.asarray(lk, dtype=np.int64)
+ rk = np.asarray(rk, dtype=np.int64)
+
klass, lk, rk = _convert_arrays_and_get_rizer_klass(lk, rk)
rizer = klass(max(len(lk), len(rk)))
diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py
index 699a32fe0c028..e544bde16da9c 100644
--- a/pandas/core/strings/accessor.py
+++ b/pandas/core/strings/accessor.py
@@ -273,14 +273,40 @@ def _wrap_result(
if isinstance(result.dtype, ArrowDtype):
import pyarrow as pa
+ from pandas.compat import pa_version_under11p0
+
from pandas.core.arrays.arrow.array import ArrowExtensionArray
- max_len = pa.compute.max(
- result._data.combine_chunks().value_lengths()
- ).as_py()
- if result.isna().any():
+ value_lengths = result._data.combine_chunks().value_lengths()
+ max_len = pa.compute.max(value_lengths).as_py()
+ min_len = pa.compute.min(value_lengths).as_py()
+ if result._hasna:
# ArrowExtensionArray.fillna doesn't work for list scalars
- result._data = result._data.fill_null([None] * max_len)
+ result = ArrowExtensionArray(
+ result._data.fill_null([None] * max_len)
+ )
+ if min_len < max_len:
+ # append nulls to each scalar list element up to max_len
+ if not pa_version_under11p0:
+ result = ArrowExtensionArray(
+ pa.compute.list_slice(
+ result._data,
+ start=0,
+ stop=max_len,
+ return_fixed_size_list=True,
+ )
+ )
+ else:
+ all_null = np.full(max_len, fill_value=None, dtype=object)
+ values = result.to_numpy()
+ new_values = []
+ for row in values:
+ if len(row) < max_len:
+ nulls = all_null[: max_len - len(row)]
+ row = np.append(row, nulls)
+ new_values.append(row)
+ pa_type = result._data.type
+ result = ArrowExtensionArray(pa.array(new_values, type=pa_type))
if name is not None:
labels = name
else:
diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py
index d3637d177b7f5..a0e4d003e45bd 100644
--- a/pandas/core/window/rolling.py
+++ b/pandas/core/window/rolling.py
@@ -1136,7 +1136,7 @@ def _validate(self):
if not isinstance(self.win_type, str):
raise ValueError(f"Invalid win_type {self.win_type}")
signal = import_optional_dependency(
- "scipy.signal", extra="Scipy is required to generate window weight."
+ "scipy.signal.windows", extra="Scipy is required to generate window weight."
)
self._scipy_weight_generator = getattr(signal, self.win_type, None)
if self._scipy_weight_generator is None:
diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py
index ccae3887c248e..ee2eaa6c58377 100644
--- a/pandas/io/formats/format.py
+++ b/pandas/io/formats/format.py
@@ -1426,7 +1426,7 @@ def _format(x):
fmt_values = []
for i, v in enumerate(vals):
- if not is_float_type[i] and leading_space or self.formatter is not None:
+ if (not is_float_type[i] or self.formatter is not None) and leading_space:
fmt_values.append(f" {_format(v)}")
elif is_float_type[i]:
fmt_values.append(float_format(v))
diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py
index 2db759719fcb4..22ab8607e060a 100644
--- a/pandas/io/parsers/base_parser.py
+++ b/pandas/io/parsers/base_parser.py
@@ -80,6 +80,7 @@
FloatingArray,
IntegerArray,
)
+from pandas.core.arrays.boolean import BooleanDtype
from pandas.core.indexes.api import (
Index,
MultiIndex,
@@ -800,7 +801,7 @@ def _cast_types(self, values: ArrayLike, cast_type: DtypeObj, column) -> ArrayLi
elif isinstance(cast_type, ExtensionDtype):
array_type = cast_type.construct_array_type()
try:
- if is_bool_dtype(cast_type):
+ if isinstance(cast_type, BooleanDtype):
# error: Unexpected keyword argument "true_values" for
# "_from_sequence_of_strings" of "ExtensionArray"
return array_type._from_sequence_of_strings( # type: ignore[call-arg] # noqa:E501
diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py
index 754cc94b6ded6..88244018079d2 100644
--- a/pandas/plotting/_matplotlib/core.py
+++ b/pandas/plotting/_matplotlib/core.py
@@ -1112,7 +1112,9 @@ def _get_subplots(self):
from matplotlib.axes import Subplot
return [
- ax for ax in self.axes[0].get_figure().get_axes() if isinstance(ax, Subplot)
+ ax
+ for ax in self.fig.get_axes()
+ if (isinstance(ax, Subplot) and ax.get_subplotspec() is not None)
]
def _get_axes_layout(self) -> tuple[int, int]:
diff --git a/pandas/tests/apply/test_frame_apply_relabeling.py b/pandas/tests/apply/test_frame_apply_relabeling.py
index ed3836de47051..41f8ec2576fd4 100644
--- a/pandas/tests/apply/test_frame_apply_relabeling.py
+++ b/pandas/tests/apply/test_frame_apply_relabeling.py
@@ -1,7 +1,7 @@
import numpy as np
import pytest
-from pandas.compat import is_numpy_dev
+from pandas.compat.numpy import np_version_gte1p25
import pandas as pd
import pandas._testing as tm
@@ -45,7 +45,7 @@ def test_agg_relabel_multi_columns_multi_methods():
tm.assert_frame_equal(result, expected)
-@pytest.mark.xfail(is_numpy_dev, reason="name of min now equals name of np.min")
+@pytest.mark.xfail(np_version_gte1p25, reason="name of min now equals name of np.min")
def test_agg_relabel_partial_functions():
# GH 26513, test on partial, functools or more complex cases
df = pd.DataFrame({"A": [1, 2, 1, 2], "B": [1, 2, 3, 4], "C": [3, 4, 5, 6]})
diff --git a/pandas/tests/dtypes/test_missing.py b/pandas/tests/dtypes/test_missing.py
index db6c51d0f6991..c86407c7d6a79 100644
--- a/pandas/tests/dtypes/test_missing.py
+++ b/pandas/tests/dtypes/test_missing.py
@@ -9,7 +9,7 @@
from pandas._libs import missing as libmissing
from pandas._libs.tslibs import iNaT
-from pandas.compat import is_numpy_dev
+from pandas.compat.numpy import np_version_gte1p25
from pandas.core.dtypes.common import (
is_float,
@@ -460,7 +460,7 @@ def test_array_equivalent_series(val):
cm = (
# stacklevel is chosen to make sense when called from .equals
tm.assert_produces_warning(FutureWarning, match=msg, check_stacklevel=False)
- if isinstance(val, str) and not is_numpy_dev
+ if isinstance(val, str) and not np_version_gte1p25
else nullcontext()
)
with cm:
diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py
index 8907137c71844..03734626c8f95 100644
--- a/pandas/tests/extension/test_arrow.py
+++ b/pandas/tests/extension/test_arrow.py
@@ -45,6 +45,7 @@
import pandas as pd
import pandas._testing as tm
+from pandas.api.extensions import no_default
from pandas.api.types import (
is_bool_dtype,
is_float_dtype,
@@ -738,14 +739,11 @@ def test_setitem_preserves_views(self, data):
class TestBaseParsing(base.BaseParsingTests):
+ @pytest.mark.parametrize("dtype_backend", ["pyarrow", no_default])
@pytest.mark.parametrize("engine", ["c", "python"])
- def test_EA_types(self, engine, data, request):
+ def test_EA_types(self, engine, data, dtype_backend, request):
pa_dtype = data.dtype.pyarrow_dtype
- if pa.types.is_boolean(pa_dtype):
- request.node.add_marker(
- pytest.mark.xfail(raises=TypeError, reason="GH 47534")
- )
- elif pa.types.is_decimal(pa_dtype):
+ if pa.types.is_decimal(pa_dtype):
request.node.add_marker(
pytest.mark.xfail(
raises=NotImplementedError,
@@ -763,6 +761,17 @@ def test_EA_types(self, engine, data, request):
request.node.add_marker(
pytest.mark.xfail(reason="CSV parsers don't correctly handle binary")
)
+ elif (
+ pa.types.is_duration(pa_dtype)
+ and dtype_backend == "pyarrow"
+ and engine == "python"
+ ):
+ request.node.add_marker(
+ pytest.mark.xfail(
+ raises=TypeError,
+ reason="Invalid type for timedelta scalar: NAType",
+ )
+ )
df = pd.DataFrame({"with_dtype": pd.Series(data, dtype=str(data.dtype))})
csv_output = df.to_csv(index=False, na_rep=np.nan)
if pa.types.is_binary(pa_dtype):
@@ -770,7 +779,10 @@ def test_EA_types(self, engine, data, request):
else:
csv_output = StringIO(csv_output)
result = pd.read_csv(
- csv_output, dtype={"with_dtype": str(data.dtype)}, engine=engine
+ csv_output,
+ dtype={"with_dtype": str(data.dtype)},
+ engine=engine,
+ dtype_backend=dtype_backend,
)
expected = df
self.assert_frame_equal(result, expected)
@@ -2303,6 +2315,15 @@ def test_str_split():
)
tm.assert_frame_equal(result, expected)
+ result = ser.str.split("1", expand=True)
+ expected = pd.DataFrame(
+ {
+ 0: ArrowExtensionArray(pa.array(["a", "a2cbcb", None])),
+ 1: ArrowExtensionArray(pa.array(["cbcb", None, None])),
+ }
+ )
+ tm.assert_frame_equal(result, expected)
+
def test_str_rsplit():
# GH 52401
@@ -2328,6 +2349,15 @@ def test_str_rsplit():
)
tm.assert_frame_equal(result, expected)
+ result = ser.str.rsplit("1", expand=True)
+ expected = pd.DataFrame(
+ {
+ 0: ArrowExtensionArray(pa.array(["a", "a2cbcb", None])),
+ 1: ArrowExtensionArray(pa.array(["cbcb", None, None])),
+ }
+ )
+ tm.assert_frame_equal(result, expected)
+
def test_str_unsupported_extract():
ser = pd.Series(["abc", None], dtype=ArrowDtype(pa.string()))
diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py
index 998ac9c8395ce..bdd54b5cf9ede 100644
--- a/pandas/tests/frame/indexing/test_setitem.py
+++ b/pandas/tests/frame/indexing/test_setitem.py
@@ -61,7 +61,7 @@ class mystring(str):
"dtype", ["int32", "int64", "uint32", "uint64", "float32", "float64"]
)
def test_setitem_dtype(self, dtype, float_frame):
- arr = np.random.randn(len(float_frame))
+ arr = np.random.randint(1, 10, len(float_frame))
float_frame[dtype] = np.array(arr, dtype=dtype)
assert float_frame[dtype].dtype.name == dtype
diff --git a/pandas/tests/frame/methods/test_compare.py b/pandas/tests/frame/methods/test_compare.py
index fe74ec8077bc9..9f9fc4a7bd8cc 100644
--- a/pandas/tests/frame/methods/test_compare.py
+++ b/pandas/tests/frame/methods/test_compare.py
@@ -1,7 +1,7 @@
import numpy as np
import pytest
-from pandas.compat import is_numpy_dev
+from pandas.compat.numpy import np_version_gte1p25
import pandas as pd
import pandas._testing as tm
@@ -265,7 +265,7 @@ def test_compare_ea_and_np_dtype(val1, val2):
("b", "other"): np.nan,
}
)
- if val1 is pd.NA and is_numpy_dev:
+ if val1 is pd.NA and np_version_gte1p25:
# can't compare with numpy array if it contains pd.NA
with pytest.raises(TypeError, match="boolean value of NA is ambiguous"):
result = df1.compare(df2, keep_shape=True)
diff --git a/pandas/tests/frame/methods/test_convert_dtypes.py b/pandas/tests/frame/methods/test_convert_dtypes.py
index 2adee158379bb..082ef025992dd 100644
--- a/pandas/tests/frame/methods/test_convert_dtypes.py
+++ b/pandas/tests/frame/methods/test_convert_dtypes.py
@@ -146,7 +146,7 @@ def test_pyarrow_engine_lines_false(self):
with pytest.raises(ValueError, match=msg):
df.convert_dtypes(dtype_backend="numpy")
- def test_pyarrow_backend_no_convesion(self):
+ def test_pyarrow_backend_no_conversion(self):
# GH#52872
pytest.importorskip("pyarrow")
df = pd.DataFrame({"a": [1, 2], "b": 1.5, "c": True, "d": "x"})
@@ -159,3 +159,11 @@ def test_pyarrow_backend_no_convesion(self):
dtype_backend="pyarrow",
)
tm.assert_frame_equal(result, expected)
+
+ def test_convert_dtypes_pyarrow_to_np_nullable(self):
+ # GH 53648
+ pytest.importorskip("pyarrow")
+ ser = pd.DataFrame(range(2), dtype="int32[pyarrow]")
+ result = ser.convert_dtypes(dtype_backend="numpy_nullable")
+ expected = pd.DataFrame(range(2), dtype="Int32")
+ tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/frame/methods/test_nlargest.py b/pandas/tests/frame/methods/test_nlargest.py
index b5c33a41dd780..17dea51263222 100644
--- a/pandas/tests/frame/methods/test_nlargest.py
+++ b/pandas/tests/frame/methods/test_nlargest.py
@@ -9,6 +9,7 @@
import pandas as pd
import pandas._testing as tm
+from pandas.util.version import Version
@pytest.fixture
@@ -155,7 +156,7 @@ def test_nlargest_n_identical_values(self):
[["a", "b", "c"], ["c", "b", "a"], ["a"], ["b"], ["a", "b"], ["c", "b"]],
)
@pytest.mark.parametrize("n", range(1, 6))
- def test_nlargest_n_duplicate_index(self, df_duplicates, n, order):
+ def test_nlargest_n_duplicate_index(self, df_duplicates, n, order, request):
# GH#13412
df = df_duplicates
@@ -165,6 +166,18 @@ def test_nlargest_n_duplicate_index(self, df_duplicates, n, order):
result = df.nlargest(n, order)
expected = df.sort_values(order, ascending=False).head(n)
+ if Version(np.__version__) >= Version("1.25") and (
+ (order == ["a"] and n in (1, 2, 3, 4)) or (order == ["a", "b"]) and n == 5
+ ):
+ request.node.add_marker(
+ pytest.mark.xfail(
+ reason=(
+ "pandas default unstable sorting of duplicates"
+ "issue with numpy>=1.25 with AVX instructions"
+ ),
+ strict=False,
+ )
+ )
tm.assert_frame_equal(result, expected)
def test_nlargest_duplicate_keep_all_ties(self):
diff --git a/pandas/tests/frame/methods/test_sort_values.py b/pandas/tests/frame/methods/test_sort_values.py
index e2877acbdd040..4c41632040dbe 100644
--- a/pandas/tests/frame/methods/test_sort_values.py
+++ b/pandas/tests/frame/methods/test_sort_values.py
@@ -12,6 +12,7 @@
date_range,
)
import pandas._testing as tm
+from pandas.util.version import Version
class TestDataFrameSortValues:
@@ -849,9 +850,22 @@ def ascending(request):
class TestSortValuesLevelAsStr:
def test_sort_index_level_and_column_label(
- self, df_none, df_idx, sort_names, ascending
+ self, df_none, df_idx, sort_names, ascending, request
):
# GH#14353
+ if (
+ Version(np.__version__) >= Version("1.25")
+ and request.node.callspec.id == "df_idx0-inner-True"
+ ):
+ request.node.add_marker(
+ pytest.mark.xfail(
+ reason=(
+ "pandas default unstable sorting of duplicates"
+ "issue with numpy>=1.25 with AVX instructions"
+ ),
+ strict=False,
+ )
+ )
# Get index levels from df_idx
levels = df_idx.index.names
@@ -867,7 +881,7 @@ def test_sort_index_level_and_column_label(
tm.assert_frame_equal(result, expected)
def test_sort_column_level_and_index_label(
- self, df_none, df_idx, sort_names, ascending
+ self, df_none, df_idx, sort_names, ascending, request
):
# GH#14353
@@ -886,6 +900,17 @@ def test_sort_column_level_and_index_label(
# Compute result by transposing and sorting on axis=1.
result = df_idx.T.sort_values(by=sort_names, ascending=ascending, axis=1)
+ if Version(np.__version__) >= Version("1.25"):
+ request.node.add_marker(
+ pytest.mark.xfail(
+ reason=(
+ "pandas default unstable sorting of duplicates"
+ "issue with numpy>=1.25 with AVX instructions"
+ ),
+ strict=False,
+ )
+ )
+
tm.assert_frame_equal(result, expected)
def test_sort_values_validate_ascending_for_value_error(self):
diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py
index 269e710825899..d4bb45b3ba11a 100644
--- a/pandas/tests/frame/test_constructors.py
+++ b/pandas/tests/frame/test_constructors.py
@@ -2696,6 +2696,24 @@ def test_frame_from_dict_with_mixed_tzaware_indexes(self):
with pytest.raises(TypeError, match=msg):
DataFrame({"D": ser1, "A": ser2, "B": ser3})
+ @pytest.mark.parametrize(
+ "key_val, col_vals, col_type",
+ [
+ ["3", ["3", "4"], "utf8"],
+ [3, [3, 4], "int8"],
+ ],
+ )
+ def test_dict_data_arrow_column_expansion(self, key_val, col_vals, col_type):
+ # GH 53617
+ pa = pytest.importorskip("pyarrow")
+ cols = pd.arrays.ArrowExtensionArray(
+ pa.array(col_vals, type=pa.dictionary(pa.int8(), getattr(pa, col_type)()))
+ )
+ result = DataFrame({key_val: [1, 2]}, columns=cols)
+ expected = DataFrame([[1, np.nan], [2, np.nan]], columns=cols)
+ expected.iloc[:, 1] = expected.iloc[:, 1].astype(object)
+ tm.assert_frame_equal(result, expected)
+
class TestDataFrameConstructorWithDtypeCoercion:
def test_floating_values_integer_dtype(self):
diff --git a/pandas/tests/frame/test_unary.py b/pandas/tests/frame/test_unary.py
index a9ec726ab443e..e325599684e5b 100644
--- a/pandas/tests/frame/test_unary.py
+++ b/pandas/tests/frame/test_unary.py
@@ -3,7 +3,7 @@
import numpy as np
import pytest
-from pandas.compat import is_numpy_dev
+from pandas.compat.numpy import np_version_gte1p25
import pandas as pd
import pandas._testing as tm
@@ -130,7 +130,7 @@ def test_pos_object(self, df):
)
def test_pos_object_raises(self, df):
# GH#21380
- if is_numpy_dev:
+ if np_version_gte1p25:
with pytest.raises(
TypeError, match=r"^bad operand type for unary \+: \'str\'$"
):
diff --git a/pandas/tests/groupby/test_value_counts.py b/pandas/tests/groupby/test_value_counts.py
index 2c3c2277ed627..ce29fba7a7ab0 100644
--- a/pandas/tests/groupby/test_value_counts.py
+++ b/pandas/tests/groupby/test_value_counts.py
@@ -21,6 +21,7 @@
to_datetime,
)
import pandas._testing as tm
+from pandas.util.version import Version
def tests_value_counts_index_names_category_column():
@@ -244,8 +245,18 @@ def test_bad_subset(education_df):
gp.value_counts(subset=["country"])
-def test_basic(education_df):
+def test_basic(education_df, request):
# gh43564
+ if Version(np.__version__) >= Version("1.25"):
+ request.node.add_marker(
+ pytest.mark.xfail(
+ reason=(
+ "pandas default unstable sorting of duplicates"
+ "issue with numpy>=1.25 with AVX instructions"
+ ),
+ strict=False,
+ )
+ )
result = education_df.groupby("country")[["gender", "education"]].value_counts(
normalize=True
)
@@ -283,7 +294,7 @@ def _frame_value_counts(df, keys, normalize, sort, ascending):
@pytest.mark.parametrize("as_index", [True, False])
@pytest.mark.parametrize("frame", [True, False])
def test_against_frame_and_seriesgroupby(
- education_df, groupby, normalize, name, sort, ascending, as_index, frame
+ education_df, groupby, normalize, name, sort, ascending, as_index, frame, request
):
# test all parameters:
# - Use column, array or function as by= parameter
@@ -293,6 +304,16 @@ def test_against_frame_and_seriesgroupby(
# - 3-way compare against:
# - apply with :meth:`~DataFrame.value_counts`
# - `~SeriesGroupBy.value_counts`
+ if Version(np.__version__) >= Version("1.25") and frame and sort and normalize:
+ request.node.add_marker(
+ pytest.mark.xfail(
+ reason=(
+ "pandas default unstable sorting of duplicates"
+ "issue with numpy>=1.25 with AVX instructions"
+ ),
+ strict=False,
+ )
+ )
by = {
"column": "country",
"array": education_df["country"].values,
@@ -454,8 +475,18 @@ def nulls_df():
],
)
def test_dropna_combinations(
- nulls_df, group_dropna, count_dropna, expected_rows, expected_values
+ nulls_df, group_dropna, count_dropna, expected_rows, expected_values, request
):
+ if Version(np.__version__) >= Version("1.25") and not group_dropna:
+ request.node.add_marker(
+ pytest.mark.xfail(
+ reason=(
+ "pandas default unstable sorting of duplicates"
+ "issue with numpy>=1.25 with AVX instructions"
+ ),
+ strict=False,
+ )
+ )
gp = nulls_df.groupby(["A", "B"], dropna=group_dropna)
result = gp.value_counts(normalize=True, sort=True, dropna=count_dropna)
columns = DataFrame()
@@ -546,10 +577,20 @@ def test_data_frame_value_counts_dropna(
],
)
def test_categorical_single_grouper_with_only_observed_categories(
- education_df, as_index, observed, normalize, name, expected_data
+ education_df, as_index, observed, normalize, name, expected_data, request
):
# Test single categorical grouper with only observed grouping categories
# when non-groupers are also categorical
+ if Version(np.__version__) >= Version("1.25"):
+ request.node.add_marker(
+ pytest.mark.xfail(
+ reason=(
+ "pandas default unstable sorting of duplicates"
+ "issue with numpy>=1.25 with AVX instructions"
+ ),
+ strict=False,
+ )
+ )
gp = education_df.astype("category").groupby(
"country", as_index=as_index, observed=observed
@@ -645,10 +686,21 @@ def assert_categorical_single_grouper(
],
)
def test_categorical_single_grouper_observed_true(
- education_df, as_index, normalize, name, expected_data
+ education_df, as_index, normalize, name, expected_data, request
):
# GH#46357
+ if Version(np.__version__) >= Version("1.25"):
+ request.node.add_marker(
+ pytest.mark.xfail(
+ reason=(
+ "pandas default unstable sorting of duplicates"
+ "issue with numpy>=1.25 with AVX instructions"
+ ),
+ strict=False,
+ )
+ )
+
expected_index = [
("FR", "male", "low"),
("FR", "female", "high"),
@@ -715,10 +767,21 @@ def test_categorical_single_grouper_observed_true(
],
)
def test_categorical_single_grouper_observed_false(
- education_df, as_index, normalize, name, expected_data
+ education_df, as_index, normalize, name, expected_data, request
):
# GH#46357
+ if Version(np.__version__) >= Version("1.25"):
+ request.node.add_marker(
+ pytest.mark.xfail(
+ reason=(
+ "pandas default unstable sorting of duplicates"
+ "issue with numpy>=1.25 with AVX instructions"
+ ),
+ strict=False,
+ )
+ )
+
expected_index = [
("FR", "male", "low"),
("FR", "female", "high"),
@@ -856,10 +919,22 @@ def test_categorical_multiple_groupers(
],
)
def test_categorical_non_groupers(
- education_df, as_index, observed, normalize, name, expected_data
+ education_df, as_index, observed, normalize, name, expected_data, request
):
# GH#46357 Test non-observed categories are included in the result,
# regardless of `observed`
+
+ if Version(np.__version__) >= Version("1.25"):
+ request.node.add_marker(
+ pytest.mark.xfail(
+ reason=(
+ "pandas default unstable sorting of duplicates"
+ "issue with numpy>=1.25 with AVX instructions"
+ ),
+ strict=False,
+ )
+ )
+
education_df = education_df.copy()
education_df["gender"] = education_df["gender"].astype("category")
education_df["education"] = education_df["education"].astype("category")
diff --git a/pandas/tests/indexes/numeric/test_indexing.py b/pandas/tests/indexes/numeric/test_indexing.py
index c99e912ce4c0f..cd28d519313ed 100644
--- a/pandas/tests/indexes/numeric/test_indexing.py
+++ b/pandas/tests/indexes/numeric/test_indexing.py
@@ -11,7 +11,10 @@
Timestamp,
)
import pandas._testing as tm
-from pandas.core.arrays import FloatingArray
+from pandas.core.arrays import (
+ ArrowExtensionArray,
+ FloatingArray,
+)
@pytest.fixture
@@ -389,6 +392,26 @@ def test_get_indexer_masked_na_boolean(self, dtype):
result = idx.get_loc(NA)
assert result == 2
+ def test_get_indexer_arrow_dictionary_target(self):
+ pa = pytest.importorskip("pyarrow")
+ target = Index(
+ ArrowExtensionArray(
+ pa.array([1, 2], type=pa.dictionary(pa.int8(), pa.int8()))
+ )
+ )
+ idx = Index([1])
+
+ result = idx.get_indexer(target)
+ expected = np.array([0, -1], dtype=np.int64)
+ tm.assert_numpy_array_equal(result, expected)
+
+ result_1, result_2 = idx.get_indexer_non_unique(target)
+ expected_1, expected_2 = np.array([0, -1], dtype=np.int64), np.array(
+ [1], dtype=np.int64
+ )
+ tm.assert_numpy_array_equal(result_1, expected_1)
+ tm.assert_numpy_array_equal(result_2, expected_2)
+
class TestWhere:
@pytest.mark.parametrize(
diff --git a/pandas/tests/indexes/test_setops.py b/pandas/tests/indexes/test_setops.py
index 1be4a1835de09..1a3ed91d7f903 100644
--- a/pandas/tests/indexes/test_setops.py
+++ b/pandas/tests/indexes/test_setops.py
@@ -578,6 +578,43 @@ def test_union_nan_in_both(dup):
tm.assert_index_equal(result, expected)
+def test_union_rangeindex_sort_true():
+ # GH 53490
+ idx1 = RangeIndex(1, 100, 6)
+ idx2 = RangeIndex(1, 50, 3)
+ result = idx1.union(idx2, sort=True)
+ expected = Index(
+ [
+ 1,
+ 4,
+ 7,
+ 10,
+ 13,
+ 16,
+ 19,
+ 22,
+ 25,
+ 28,
+ 31,
+ 34,
+ 37,
+ 40,
+ 43,
+ 46,
+ 49,
+ 55,
+ 61,
+ 67,
+ 73,
+ 79,
+ 85,
+ 91,
+ 97,
+ ]
+ )
+ tm.assert_index_equal(result, expected)
+
+
def test_union_with_duplicate_index_not_subset_and_non_monotonic(
any_dtype_for_small_pos_integer_indexes,
):
diff --git a/pandas/tests/indexing/test_datetime.py b/pandas/tests/indexing/test_datetime.py
index 15e1fae77d65b..6510612ba6f87 100644
--- a/pandas/tests/indexing/test_datetime.py
+++ b/pandas/tests/indexing/test_datetime.py
@@ -168,3 +168,21 @@ def test_getitem_str_slice_millisecond_resolution(self, frame_or_series):
],
)
tm.assert_equal(result, expected)
+
+ def test_getitem_pyarrow_index(self, frame_or_series):
+ # GH 53644
+ pytest.importorskip("pyarrow")
+ obj = frame_or_series(
+ range(5),
+ index=date_range("2020", freq="D", periods=5).astype(
+ "timestamp[us][pyarrow]"
+ ),
+ )
+ result = obj.loc[obj.index[:-3]]
+ expected = frame_or_series(
+ range(2),
+ index=date_range("2020", freq="D", periods=2).astype(
+ "timestamp[us][pyarrow]"
+ ),
+ )
+ tm.assert_equal(result, expected)
diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py
index fcb7f6657beac..6d1def2712632 100644
--- a/pandas/tests/io/formats/test_format.py
+++ b/pandas/tests/io/formats/test_format.py
@@ -2165,6 +2165,17 @@ def test_max_rows_fitted(self, length, min_rows, max_rows, expected):
result = formatter.max_rows_fitted
assert result == expected
+ def test_no_extra_space(self):
+ # GH 52690: Check that no extra space is given
+ col1 = "TEST"
+ col2 = "PANDAS"
+ col3 = "to_string"
+ expected = f"{col1:<6s} {col2:<7s} {col3:<10s}"
+ df = DataFrame([{"col1": "TEST", "col2": "PANDAS", "col3": "to_string"}])
+ d = {"col1": "{:<6s}".format, "col2": "{:<7s}".format, "col3": "{:<10s}".format}
+ result = df.to_string(index=False, header=False, formatters=d)
+ assert result == expected
+
def gen_series_formatting():
s1 = Series(["a"] * 100)
diff --git a/pandas/tests/io/pytables/test_file_handling.py b/pandas/tests/io/pytables/test_file_handling.py
index 49190daa37442..623d6e664090f 100644
--- a/pandas/tests/io/pytables/test_file_handling.py
+++ b/pandas/tests/io/pytables/test_file_handling.py
@@ -3,7 +3,12 @@
import numpy as np
import pytest
-from pandas.compat import is_platform_little_endian
+from pandas.compat import (
+ PY311,
+ is_ci_environment,
+ is_platform_linux,
+ is_platform_little_endian,
+)
from pandas.errors import (
ClosedFileError,
PossibleDataLossError,
@@ -222,39 +227,44 @@ def test_complibs_default_settings_override(tmp_path, setup_path):
assert node.filters.complib == "blosc"
-def test_complibs(tmp_path, setup_path):
+@pytest.mark.parametrize("lvl", range(10))
+@pytest.mark.parametrize("lib", tables.filters.all_complibs)
+@pytest.mark.filterwarnings("ignore:object name is not a valid")
+@pytest.mark.skipif(
+ not PY311 and is_ci_environment() and is_platform_linux(),
+ reason="Segfaulting in a CI environment"
+ # with xfail, would sometimes raise UnicodeDecodeError
+ # invalid state byte
+)
+def test_complibs(tmp_path, lvl, lib):
# GH14478
- df = tm.makeDataFrame()
+ df = DataFrame(
+ np.ones((30, 4)), columns=list("ABCD"), index=np.arange(30).astype(np.str_)
+ )
- # Building list of all complibs and complevels tuples
- all_complibs = tables.filters.all_complibs
# Remove lzo if its not available on this platform
if not tables.which_lib_version("lzo"):
- all_complibs.remove("lzo")
+ pytest.skip("lzo not available")
# Remove bzip2 if its not available on this platform
if not tables.which_lib_version("bzip2"):
- all_complibs.remove("bzip2")
-
- all_levels = range(0, 10)
- all_tests = [(lib, lvl) for lib in all_complibs for lvl in all_levels]
+ pytest.skip("bzip2 not available")
- for lib, lvl in all_tests:
- tmpfile = tmp_path / setup_path
- gname = "foo"
+ tmpfile = tmp_path / f"{lvl}_{lib}.h5"
+ gname = f"{lvl}_{lib}"
- # Write and read file to see if data is consistent
- df.to_hdf(tmpfile, gname, complib=lib, complevel=lvl)
- result = read_hdf(tmpfile, gname)
- tm.assert_frame_equal(result, df)
+ # Write and read file to see if data is consistent
+ df.to_hdf(tmpfile, gname, complib=lib, complevel=lvl)
+ result = read_hdf(tmpfile, gname)
+ tm.assert_frame_equal(result, df)
- # Open file and check metadata for correct amount of compression
- with tables.open_file(tmpfile, mode="r") as h5table:
- for node in h5table.walk_nodes(where="/" + gname, classname="Leaf"):
- assert node.filters.complevel == lvl
- if lvl == 0:
- assert node.filters.complib is None
- else:
- assert node.filters.complib == lib
+ # Open file and check metadata for correct amount of compression
+ with tables.open_file(tmpfile, mode="r") as h5table:
+ for node in h5table.walk_nodes(where="/" + gname, classname="Leaf"):
+ assert node.filters.complevel == lvl
+ if lvl == 0:
+ assert node.filters.complib is None
+ else:
+ assert node.filters.complib == lib
@pytest.mark.skipif(
diff --git a/pandas/tests/libs/test_hashtable.py b/pandas/tests/libs/test_hashtable.py
index 4a5d5e0c85a09..60af595018a54 100644
--- a/pandas/tests/libs/test_hashtable.py
+++ b/pandas/tests/libs/test_hashtable.py
@@ -461,7 +461,7 @@ def test_get_labels_groupby_for_Int64(writable):
def test_tracemalloc_works_for_StringHashTable():
N = 1000
- keys = np.arange(N).astype(np.compat.unicode).astype(np.object_)
+ keys = np.arange(N).astype(np.str_).astype(np.object_)
with activated_tracemalloc():
table = ht.StringHashTable()
table.map_locations(keys)
@@ -484,7 +484,7 @@ def test_tracemalloc_for_empty_StringHashTable():
@pytest.mark.parametrize("N", range(1, 110))
def test_no_reallocation_StringHashTable(N):
- keys = np.arange(N).astype(np.compat.unicode).astype(np.object_)
+ keys = np.arange(N).astype(np.str_).astype(np.object_)
preallocated_table = ht.StringHashTable(N)
n_buckets_start = preallocated_table.get_state()["n_buckets"]
preallocated_table.map_locations(keys)
diff --git a/pandas/tests/libs/test_lib.py b/pandas/tests/libs/test_lib.py
index 383e1b81e17a7..6ad8d748d6997 100644
--- a/pandas/tests/libs/test_lib.py
+++ b/pandas/tests/libs/test_lib.py
@@ -6,6 +6,7 @@
lib,
writers as libwriters,
)
+from pandas.compat import IS64
from pandas import Index
import pandas._testing as tm
@@ -248,6 +249,18 @@ def test_is_range_indexer(self, dtype):
left = np.arange(0, 100, dtype=dtype)
assert lib.is_range_indexer(left, 100)
+ @pytest.mark.skipif(
+ not IS64,
+ reason="2**31 is too big for Py_ssize_t on 32-bit. "
+ "It doesn't matter though since you cannot create an array that long on 32-bit",
+ )
+ @pytest.mark.parametrize("dtype", ["int64", "int32"])
+ def test_is_range_indexer_big_n(self, dtype):
+ # GH53616
+ left = np.arange(0, 100, dtype=dtype)
+
+ assert not lib.is_range_indexer(left, 2**31)
+
@pytest.mark.parametrize("dtype", ["int64", "int32"])
def test_is_range_indexer_not_equal(self, dtype):
# GH#50592
diff --git a/pandas/tests/plotting/test_common.py b/pandas/tests/plotting/test_common.py
index d4624cfc74872..faf8278675566 100644
--- a/pandas/tests/plotting/test_common.py
+++ b/pandas/tests/plotting/test_common.py
@@ -40,3 +40,22 @@ def test__gen_two_subplots_with_ax(self):
subplot_geometry = list(axes[0].get_subplotspec().get_geometry()[:-1])
subplot_geometry[-1] += 1
assert subplot_geometry == [2, 1, 2]
+
+ def test_colorbar_layout(self):
+ fig = self.plt.figure()
+
+ axes = fig.subplot_mosaic(
+ """
+ AB
+ CC
+ """
+ )
+
+ x = [1, 2, 3]
+ y = [1, 2, 3]
+
+ cs0 = axes["A"].scatter(x, y)
+ axes["B"].scatter(x, y)
+
+ fig.colorbar(cs0, ax=[axes["A"], axes["B"]], location="right")
+ DataFrame(x).plot(ax=axes["C"])
diff --git a/pandas/tests/scalar/timestamp/test_timestamp.py b/pandas/tests/scalar/timestamp/test_timestamp.py
index afb4dd7422114..fb7868fd63fc3 100644
--- a/pandas/tests/scalar/timestamp/test_timestamp.py
+++ b/pandas/tests/scalar/timestamp/test_timestamp.py
@@ -14,6 +14,10 @@
tzlocal,
tzutc,
)
+from hypothesis import (
+ given,
+ strategies as st,
+)
import numpy as np
import pytest
import pytz
@@ -223,6 +227,39 @@ def test_resolution(self):
assert dt.as_unit("ms").resolution == Timedelta(milliseconds=1)
assert dt.as_unit("s").resolution == Timedelta(seconds=1)
+ @pytest.mark.parametrize(
+ "date_string, expected",
+ [
+ ("0000-2-29", 1),
+ ("0000-3-1", 2),
+ ("1582-10-14", 3),
+ ("-0040-1-1", 4),
+ ("2023-06-18", 6),
+ ],
+ )
+ def test_dow_historic(self, date_string, expected):
+ # GH 53738
+ ts = Timestamp(date_string)
+ dow = ts.weekday()
+ assert dow == expected
+
+ @given(
+ ts=st.datetimes(),
+ sign=st.sampled_from(["-", ""]),
+ )
+ def test_dow_parametric(self, ts, sign):
+ # GH 53738
+ ts = (
+ f"{sign}{str(ts.year).zfill(4)}"
+ f"-{str(ts.month).zfill(2)}"
+ f"-{str(ts.day).zfill(2)}"
+ )
+ result = Timestamp(ts).weekday()
+ expected = (
+ (np.datetime64(ts) - np.datetime64("1970-01-01")).astype("int64") - 4
+ ) % 7
+ assert result == expected
+
class TestTimestamp:
def test_default_to_stdlib_utc(self):
diff --git a/pandas/tests/series/methods/test_convert_dtypes.py b/pandas/tests/series/methods/test_convert_dtypes.py
index d91cd6a43daea..b0f5093e4951d 100644
--- a/pandas/tests/series/methods/test_convert_dtypes.py
+++ b/pandas/tests/series/methods/test_convert_dtypes.py
@@ -240,3 +240,11 @@ def test_convert_dtype_object_with_na_float(self, infer_objects, dtype):
result = ser.convert_dtypes(infer_objects=infer_objects)
expected = pd.Series([1.5, pd.NA], dtype=dtype)
tm.assert_series_equal(result, expected)
+
+ def test_convert_dtypes_pyarrow_to_np_nullable(self):
+ # GH 53648
+ pytest.importorskip("pyarrow")
+ ser = pd.Series(range(2), dtype="int32[pyarrow]")
+ result = ser.convert_dtypes(dtype_backend="numpy_nullable")
+ expected = pd.Series(range(2), dtype="Int32")
+ tm.assert_series_equal(result, expected)
diff --git a/pandas/tests/series/methods/test_describe.py b/pandas/tests/series/methods/test_describe.py
index 0062d45cc68a0..7f706fc54897e 100644
--- a/pandas/tests/series/methods/test_describe.py
+++ b/pandas/tests/series/methods/test_describe.py
@@ -1,7 +1,7 @@
import numpy as np
import pytest
-from pandas.compat import is_numpy_dev
+from pandas.compat.numpy import np_version_gte1p25
from pandas.core.dtypes.common import (
is_complex_dtype,
@@ -166,7 +166,7 @@ def test_numeric_result_dtype(self, any_numeric_dtype):
dtype = "complex128" if is_complex_dtype(any_numeric_dtype) else None
ser = Series([0, 1], dtype=any_numeric_dtype)
- if dtype == "complex128" and is_numpy_dev:
+ if dtype == "complex128" and np_version_gte1p25:
with pytest.raises(
TypeError, match=r"^a must be an array of real numbers$"
):
diff --git a/pandas/tests/series/methods/test_equals.py b/pandas/tests/series/methods/test_equals.py
index 9278d1b51e1aa..b94723b7cbddf 100644
--- a/pandas/tests/series/methods/test_equals.py
+++ b/pandas/tests/series/methods/test_equals.py
@@ -5,7 +5,7 @@
import pytest
from pandas._libs.missing import is_matching_na
-from pandas.compat import is_numpy_dev
+from pandas.compat.numpy import np_version_gte1p25
from pandas.core.dtypes.common import is_float
@@ -51,7 +51,7 @@ def test_equals_list_array(val):
cm = (
tm.assert_produces_warning(FutureWarning, check_stacklevel=False)
- if isinstance(val, str) and not is_numpy_dev
+ if isinstance(val, str) and not np_version_gte1p25
else nullcontext()
)
with cm:
diff --git a/pandas/tests/series/methods/test_nlargest.py b/pandas/tests/series/methods/test_nlargest.py
index 146ba2c42e0be..ecc5d3060c0a2 100644
--- a/pandas/tests/series/methods/test_nlargest.py
+++ b/pandas/tests/series/methods/test_nlargest.py
@@ -217,7 +217,12 @@ def test_nlargest_boolean(self, data, expected):
def test_nlargest_nullable(self, any_numeric_ea_dtype):
# GH#42816
dtype = any_numeric_ea_dtype
- arr = np.random.randn(10).astype(dtype.lower(), copy=False)
+ if dtype.startswith("UInt"):
+ # Can't cast from negative float to uint on some platforms
+ arr = np.random.randint(1, 10, 10)
+ else:
+ arr = np.random.randn(10)
+ arr = arr.astype(dtype.lower(), copy=False)
ser = Series(arr.copy(), dtype=dtype)
ser[1] = pd.NA
diff --git a/pandas/tests/series/methods/test_reindex.py b/pandas/tests/series/methods/test_reindex.py
index 2c427399c9cd5..e70fa148fe166 100644
--- a/pandas/tests/series/methods/test_reindex.py
+++ b/pandas/tests/series/methods/test_reindex.py
@@ -10,6 +10,7 @@
NaT,
Period,
PeriodIndex,
+ RangeIndex,
Series,
Timedelta,
Timestamp,
@@ -422,3 +423,14 @@ def test_reindexing_with_float64_NA_log():
result_log = np.log(s_reindex)
expected_log = Series([0, np.NaN, np.NaN], dtype=Float64Dtype())
tm.assert_series_equal(result_log, expected_log)
+
+
+@pytest.mark.parametrize("dtype", ["timedelta64", "datetime64"])
+def test_reindex_expand_nonnano_nat(dtype):
+ # GH 53497
+ ser = Series(np.array([1], dtype=f"{dtype}[s]"))
+ result = ser.reindex(RangeIndex(2))
+ expected = Series(
+ np.array([1, getattr(np, dtype)("nat", "s")], dtype=f"{dtype}[s]")
+ )
+ tm.assert_series_equal(result, expected)
diff --git a/pyproject.toml b/pyproject.toml
index fd974e88fae08..8ea8cfa7c824c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -56,7 +56,7 @@ repository = 'https://github.com/pandas-dev/pandas'
matplotlib = "pandas:plotting._matplotlib"
[project.optional-dependencies]
-test = ['hypothesis>=6.34.2', 'pytest>=7.0.0', 'pytest-xdist>=2.2.0', 'pytest-asyncio>=0.17.0']
+test = ['hypothesis>=6.34.2', 'pytest>=7.3.2', 'pytest-xdist>=2.2.0', 'pytest-asyncio>=0.17.0']
performance = ['bottleneck>=1.3.2', 'numba>=0.53.1', 'numexpr>=2.7.1']
computation = ['scipy>=1.7.1', 'xarray>=0.21.0']
fss = ['fsspec>=2021.07.0']
@@ -101,7 +101,7 @@ all = ['beautifulsoup4>=4.9.3',
'pymysql>=1.0.2',
'PyQt5>=5.15.1',
'pyreadstat>=1.1.2',
- 'pytest>=7.0.0',
+ 'pytest>=7.3.2',
'pytest-xdist>=2.2.0',
'pytest-asyncio>=0.17.0',
'python-snappy>=0.6.0',
@@ -143,7 +143,7 @@ parentdir_prefix = "pandas-"
skip = "cp36-* cp37-* pp37-* *-manylinux_i686 *_ppc64le *_s390x *-musllinux*"
build-verbosity = "3"
environment = {LDFLAGS="-Wl,--strip-all" }
-test-requires = "hypothesis>=6.34.2 pytest>=7.0.0 pytest-xdist>=2.2.0 pytest-asyncio>=0.17"
+test-requires = "hypothesis>=6.34.2 pytest>=7.3.2 pytest-xdist>=2.2.0 pytest-asyncio>=0.17"
test-command = "python {project}/ci/test_wheels.py"
[tool.cibuildwheel.macos]
diff --git a/requirements-dev.txt b/requirements-dev.txt
index 2d263a07d53f4..857f434e76657 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -4,7 +4,7 @@
pip
versioneer[toml]
cython==0.29.33
-pytest>=7.0.0
+pytest>=7.3.2
pytest-cov
pytest-xdist>=2.2.0
pytest-asyncio>=0.17
diff --git a/scripts/tests/conftest.py b/scripts/tests/conftest.py
index 496a5195bfc84..a7976102eab98 100644
--- a/scripts/tests/conftest.py
+++ b/scripts/tests/conftest.py
@@ -1,6 +1,8 @@
+# pyproject.toml defines addopts: --strict-data-files
+# strict-data-files is defined & used in pandas/conftest.py
def pytest_addoption(parser):
parser.addoption(
"--strict-data-files",
action="store_true",
- help="Unused. For compat with setup.cfg.",
+ help="Unused",
)