codec.py 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119
  1. from .core import encode, decode, alabel, ulabel, IDNAError
  2. import codecs
  3. import re
  4. _unicode_dots_re = re.compile(u'[\u002e\u3002\uff0e\uff61]')
  5. class Codec(codecs.Codec):
  6. def encode(self, data, errors='strict'):
  7. if errors != 'strict':
  8. raise IDNAError("Unsupported error handling \"{0}\"".format(errors))
  9. if not data:
  10. return "", 0
  11. return encode(data), len(data)
  12. def decode(self, data, errors='strict'):
  13. if errors != 'strict':
  14. raise IDNAError("Unsupported error handling \"{0}\"".format(errors))
  15. if not data:
  16. return u"", 0
  17. return decode(data), len(data)
  18. class IncrementalEncoder(codecs.BufferedIncrementalEncoder):
  19. def _buffer_encode(self, data, errors, final):
  20. if errors != 'strict':
  21. raise IDNAError("Unsupported error handling \"{0}\"".format(errors))
  22. if not data:
  23. return ("", 0)
  24. labels = _unicode_dots_re.split(data)
  25. trailing_dot = u''
  26. if labels:
  27. if not labels[-1]:
  28. trailing_dot = '.'
  29. del labels[-1]
  30. elif not final:
  31. # Keep potentially unfinished label until the next call
  32. del labels[-1]
  33. if labels:
  34. trailing_dot = '.'
  35. result = []
  36. size = 0
  37. for label in labels:
  38. result.append(alabel(label))
  39. if size:
  40. size += 1
  41. size += len(label)
  42. # Join with U+002E
  43. result = ".".join(result) + trailing_dot
  44. size += len(trailing_dot)
  45. return (result, size)
  46. class IncrementalDecoder(codecs.BufferedIncrementalDecoder):
  47. def _buffer_decode(self, data, errors, final):
  48. if errors != 'strict':
  49. raise IDNAError("Unsupported error handling \"{0}\"".format(errors))
  50. if not data:
  51. return (u"", 0)
  52. # IDNA allows decoding to operate on Unicode strings, too.
  53. if isinstance(data, unicode):
  54. labels = _unicode_dots_re.split(data)
  55. else:
  56. # Must be ASCII string
  57. data = str(data)
  58. unicode(data, "ascii")
  59. labels = data.split(".")
  60. trailing_dot = u''
  61. if labels:
  62. if not labels[-1]:
  63. trailing_dot = u'.'
  64. del labels[-1]
  65. elif not final:
  66. # Keep potentially unfinished label until the next call
  67. del labels[-1]
  68. if labels:
  69. trailing_dot = u'.'
  70. result = []
  71. size = 0
  72. for label in labels:
  73. result.append(ulabel(label))
  74. if size:
  75. size += 1
  76. size += len(label)
  77. result = u".".join(result) + trailing_dot
  78. size += len(trailing_dot)
  79. return (result, size)
  80. class StreamWriter(Codec, codecs.StreamWriter):
  81. pass
  82. class StreamReader(Codec, codecs.StreamReader):
  83. pass
  84. def getregentry():
  85. return codecs.CodecInfo(
  86. name='idna',
  87. encode=Codec().encode,
  88. decode=Codec().decode,
  89. incrementalencoder=IncrementalEncoder,
  90. incrementaldecoder=IncrementalDecoder,
  91. streamwriter=StreamWriter,
  92. streamreader=StreamReader,
  93. )