clipboards.py 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146
  1. """ io on the clipboard """
  2. import warnings
  3. import pandas.compat as compat
  4. from pandas.compat import PY2, PY3, StringIO
  5. from pandas.core.dtypes.generic import ABCDataFrame
  6. from pandas import get_option, option_context
  7. def read_clipboard(sep=r'\s+', **kwargs): # pragma: no cover
  8. r"""
  9. Read text from clipboard and pass to read_csv. See read_csv for the
  10. full argument list
  11. Parameters
  12. ----------
  13. sep : str, default '\s+'
  14. A string or regex delimiter. The default of '\s+' denotes
  15. one or more whitespace characters.
  16. Returns
  17. -------
  18. parsed : DataFrame
  19. """
  20. encoding = kwargs.pop('encoding', 'utf-8')
  21. # only utf-8 is valid for passed value because that's what clipboard
  22. # supports
  23. if encoding is not None and encoding.lower().replace('-', '') != 'utf8':
  24. raise NotImplementedError(
  25. 'reading from clipboard only supports utf-8 encoding')
  26. from pandas.io.clipboard import clipboard_get
  27. from pandas.io.parsers import read_csv
  28. text = clipboard_get()
  29. # try to decode (if needed on PY3)
  30. # Strange. linux py33 doesn't complain, win py33 does
  31. if PY3:
  32. try:
  33. text = compat.bytes_to_str(
  34. text, encoding=(kwargs.get('encoding') or
  35. get_option('display.encoding'))
  36. )
  37. except AttributeError:
  38. pass
  39. # Excel copies into clipboard with \t separation
  40. # inspect no more then the 10 first lines, if they
  41. # all contain an equal number (>0) of tabs, infer
  42. # that this came from excel and set 'sep' accordingly
  43. lines = text[:10000].split('\n')[:-1][:10]
  44. # Need to remove leading white space, since read_csv
  45. # accepts:
  46. # a b
  47. # 0 1 2
  48. # 1 3 4
  49. counts = {x.lstrip().count('\t') for x in lines}
  50. if len(lines) > 1 and len(counts) == 1 and counts.pop() != 0:
  51. sep = '\t'
  52. # Edge case where sep is specified to be None, return to default
  53. if sep is None and kwargs.get('delim_whitespace') is None:
  54. sep = r'\s+'
  55. # Regex separator currently only works with python engine.
  56. # Default to python if separator is multi-character (regex)
  57. if len(sep) > 1 and kwargs.get('engine') is None:
  58. kwargs['engine'] = 'python'
  59. elif len(sep) > 1 and kwargs.get('engine') == 'c':
  60. warnings.warn('read_clipboard with regex separator does not work'
  61. ' properly with c engine')
  62. # In PY2, the c table reader first encodes text with UTF-8 but Python
  63. # table reader uses the format of the passed string. For consistency,
  64. # encode strings for python engine so that output from python and c
  65. # engines produce consistent results
  66. if kwargs.get('engine') == 'python' and PY2:
  67. text = text.encode('utf-8')
  68. return read_csv(StringIO(text), sep=sep, **kwargs)
  69. def to_clipboard(obj, excel=True, sep=None, **kwargs): # pragma: no cover
  70. """
  71. Attempt to write text representation of object to the system clipboard
  72. The clipboard can be then pasted into Excel for example.
  73. Parameters
  74. ----------
  75. obj : the object to write to the clipboard
  76. excel : boolean, defaults to True
  77. if True, use the provided separator, writing in a csv
  78. format for allowing easy pasting into excel.
  79. if False, write a string representation of the object
  80. to the clipboard
  81. sep : optional, defaults to tab
  82. other keywords are passed to to_csv
  83. Notes
  84. -----
  85. Requirements for your platform
  86. - Linux: xclip, or xsel (with gtk or PyQt4 modules)
  87. - Windows:
  88. - OS X:
  89. """
  90. encoding = kwargs.pop('encoding', 'utf-8')
  91. # testing if an invalid encoding is passed to clipboard
  92. if encoding is not None and encoding.lower().replace('-', '') != 'utf8':
  93. raise ValueError('clipboard only supports utf-8 encoding')
  94. from pandas.io.clipboard import clipboard_set
  95. if excel is None:
  96. excel = True
  97. if excel:
  98. try:
  99. if sep is None:
  100. sep = '\t'
  101. buf = StringIO()
  102. # clipboard_set (pyperclip) expects unicode
  103. obj.to_csv(buf, sep=sep, encoding='utf-8', **kwargs)
  104. text = buf.getvalue()
  105. if PY2:
  106. text = text.decode('utf-8')
  107. clipboard_set(text)
  108. return
  109. except TypeError:
  110. warnings.warn('to_clipboard in excel mode requires a single '
  111. 'character separator.')
  112. elif sep is not None:
  113. warnings.warn('to_clipboard with excel=False ignores the sep argument')
  114. if isinstance(obj, ABCDataFrame):
  115. # str(df) has various unhelpful defaults, like truncation
  116. with option_context('display.max_colwidth', 999999):
  117. objstr = obj.to_string(**kwargs)
  118. else:
  119. objstr = str(obj)
  120. clipboard_set(objstr)