Skip to content

BUG: to_clipboard text truncated for Python 3 on Windows for UTF-16 text #25040

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.25.0.rst
Original file line number Diff line number Diff line change
@@ -163,6 +163,7 @@ MultiIndex
I/O
^^^

- Fixed bug in missing text when using :meth:`to_clipboard` if copying utf-16 characters in Python 3 on Windows (:issue:`25040`)
-
-
-
11 changes: 8 additions & 3 deletions pandas/io/clipboard/windows.py
Original file line number Diff line number Diff line change
@@ -29,6 +29,7 @@ def init_windows_clipboard():
HINSTANCE, HMENU, BOOL, UINT, HANDLE)

windll = ctypes.windll
msvcrt = ctypes.CDLL('msvcrt')

safeCreateWindowExA = CheckedCall(windll.user32.CreateWindowExA)
safeCreateWindowExA.argtypes = [DWORD, LPCSTR, LPCSTR, DWORD, INT, INT,
@@ -71,6 +72,10 @@ def init_windows_clipboard():
safeGlobalUnlock.argtypes = [HGLOBAL]
safeGlobalUnlock.restype = BOOL

wcslen = CheckedCall(msvcrt.wcslen)
wcslen.argtypes = [c_wchar_p]
wcslen.restype = UINT

GMEM_MOVEABLE = 0x0002
CF_UNICODETEXT = 13

@@ -129,13 +134,13 @@ def copy_windows(text):
# If the hMem parameter identifies a memory object,
# the object must have been allocated using the
# function with the GMEM_MOVEABLE flag.
count = len(text) + 1
count = wcslen(text) + 1
handle = safeGlobalAlloc(GMEM_MOVEABLE,
count * sizeof(c_wchar))
locked_handle = safeGlobalLock(handle)

ctypes.memmove(c_wchar_p(locked_handle),
c_wchar_p(text), count * sizeof(c_wchar))
ctypes.memmove(c_wchar_p(locked_handle), c_wchar_p(text),
count * sizeof(c_wchar))

safeGlobalUnlock(handle)
safeSetClipboardData(CF_UNICODETEXT, handle)
20 changes: 18 additions & 2 deletions pandas/tests/io/test_clipboard.py
Original file line number Diff line number Diff line change
@@ -12,6 +12,7 @@
from pandas.util import testing as tm
from pandas.util.testing import makeCustomDataframe as mkdf

from pandas.io.clipboard import clipboard_get, clipboard_set
from pandas.io.clipboard.exceptions import PyperclipException

try:
@@ -30,8 +31,8 @@ def build_kwargs(sep, excel):
return kwargs


@pytest.fixture(params=['delims', 'utf8', 'string', 'long', 'nonascii',
'colwidth', 'mixed', 'float', 'int'])
@pytest.fixture(params=['delims', 'utf8', 'utf16', 'string', 'long',
'nonascii', 'colwidth', 'mixed', 'float', 'int'])
def df(request):
data_type = request.param

@@ -41,6 +42,10 @@ def df(request):
elif data_type == 'utf8':
return pd.DataFrame({'a': ['µasd', 'Ωœ∑´'],
'b': ['øπ∆˚¬', 'œ∑´®']})
elif data_type == 'utf16':
return pd.DataFrame({'a': ['\U0001f44d\U0001f44d',
'\U0001f44d\U0001f44d'],
'b': ['abc', 'def']})
elif data_type == 'string':
return mkdf(5, 3, c_idx_type='s', r_idx_type='i',
c_idx_names=[None], r_idx_names=[None])
@@ -225,3 +230,14 @@ def test_invalid_encoding(self, df):
@pytest.mark.parametrize('enc', ['UTF-8', 'utf-8', 'utf8'])
def test_round_trip_valid_encodings(self, enc, df):
self.check_round_trip_frame(df, encoding=enc)


@pytest.mark.single
@pytest.mark.clipboard
@pytest.mark.skipif(not _DEPS_INSTALLED,
reason="clipboard primitives not installed")
@pytest.mark.parametrize('data', [u'\U0001f44d...', u'Ωœ∑´...', 'abcd...'])
def test_raw_roundtrip(data):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Reference PR number as a comment below this function definition.

# PR #25040 wide unicode wasn't copied correctly on PY3 on windows
clipboard_set(data)
assert data == clipboard_get()