1 /**
2 	Punycode converter. This module is based on the original implementation in RFC 3492, and the JavaScript implementation by Mathias Bynens.
3 
4 	License: Boost Software License 1.0
5 */
6 module punycode;
7 
8 
9 import std.algorithm;
10 import std.array;
11 import std.ascii;
12 import std.conv;
13 import std.exception;
14 import std.traits;
15 
16 
17 /**
18 	Converts an UTF string to a Punycode string.
19 
20 	Throws:
21 		PunycodeException if an internal error occured.
22 
23 	Standards:
24 		$(LINK2 https://www.ietf.org/rfc/rfc3492.txt, RFC 3492)
25 */
26 S punyEncode(S)(S str)
27 	if (isSomeString!S)
28 {
29 	static char encodeDigit(uint x)
30 	{
31 		if (x <= 25) return cast(char)('a' + x);
32 		else if (x <= 35) return cast(char)('0' + x - 26);
33 		assert(0);
34 	}
35 
36 	auto dstr = str.to!dstring;
37 
38 	auto ret = appender!S;
39 	ret ~= dstr.filter!isASCII;
40 	assert(ret.data.length <= uint.max);
41 
42 	auto handledLength = cast(uint)ret.data.length;
43 	immutable basicLength = handledLength;
44 
45 	if (handledLength > 0) ret ~= '-';
46 
47 	if (handledLength == dstr.length) return ret.data;
48 
49 	import std.functional : not;
50 	auto ms = (() @trusted => (cast(uint[])(dstr.filter!(not!isASCII).array)).sort!"a < b")();
51 
52 	dchar n = initialN;
53 	uint delta = 0;
54 	uint bias = initialBias;
55 	while (handledLength < dstr.length)
56 	{
57 		dchar m = void;
58 		while ((m = ms.front) < n) ms.popFront();
59 
60 		enforceEx!PunycodeException((m - n) * (handledLength + 1) <= uint.max - delta, "Arithmetic overflow");
61 		delta += (m - n) * (handledLength + 1);
62 
63 		n = m;
64 
65 		foreach (immutable(dchar) c; dstr)
66 		{
67 			if (c < n)
68 			{
69 				enforceEx!PunycodeException(delta != uint.max, "Arithmetic overflow");
70 				delta++;
71 			}
72 			else if (c == n)
73 			{
74 				auto q = delta;
75 
76 				for (auto k = base;;k += base)
77 				{
78 					immutable t = k <= bias ? tmin :
79 						k >= bias + tmax ? tmax : k - bias;
80 
81 					if (q < t) break;
82 
83 					ret ~= encodeDigit(t + (q - t) % (base - t));
84 					q = (q - t) / (base - t);
85 				}
86 
87 				ret ~= encodeDigit(q);
88 
89 				bias = adaptBias(delta, cast(uint)handledLength + 1, handledLength == basicLength);
90 				delta = 0;
91 				handledLength++;
92 			}
93 		}
94 		delta++;
95 		n++;
96 	}
97 
98 	return ret.data;
99 }
100 
101 ///
102 @safe pure
103 unittest
104 {
105 	assert(punyEncode("mañana") == "maana-pta");
106 }
107 
108 
109 /**
110 	Converts a Punycode string to an UTF string.
111 
112 	Throws:
113 		PunycodeException if an internal error occured.
114 
115 		InvalidPunycodeException if an invalid Punycode string was passed.
116 
117 	Standards:
118 		$(LINK2 https://www.ietf.org/rfc/rfc3492.txt, RFC 3492)
119 */
120 S punyDecode(S)(in S str)
121 	if (isSomeString!S)
122 {
123 	static uint decodeDigit(dchar c)
124 	{
125 		if (c.isUpper) return c - 'A';
126 		if (c.isLower) return c - 'a';
127 		if (c.isDigit) return c - '0' + 26;
128 		throw new InvalidPunycodeException("Invalid Punycode");
129 	}
130 
131 	auto dstr = str.to!dstring;
132 	assert(dstr.length <= uint.max);
133 
134 	dchar[] ret;
135 
136 	dchar n = initialN;
137 	uint i = 0;
138 	uint bias = initialBias;
139 
140 	import std..string : lastIndexOf;
141 	immutable delimIdx = dstr.lastIndexOf('-');
142 	if (delimIdx != -1)
143 	{
144 		enforceEx!InvalidPunycodeException(dstr[0 .. delimIdx].all!isASCII, "Invalid Punycode");
145 		ret = dstr[0 .. delimIdx].dup;
146 	}
147 
148 	auto idx = (delimIdx == -1 || delimIdx == 0) ? 0 : delimIdx + 1;
149 
150 	while (idx < dstr.length)
151 	{
152 		immutable oldi = i;
153 		uint w = 1;
154 
155 		for (auto k = base;;k += base)
156 		{
157 			enforceEx!InvalidPunycodeException(idx < dstr.length);
158 
159 			immutable digit = decodeDigit(dstr[idx]);
160 			idx++;
161 
162 			enforceEx!PunycodeException(digit * w <= uint.max - i, "Arithmetic overflow");
163 			i += digit * w;
164 
165 			immutable t = k <= bias ? tmin :
166 				k >= bias + tmax ? tmax : k - bias;
167 			if (digit < t) break;
168 
169 			enforceEx!PunycodeException(w <= uint.max / (base - t), "Arithmetic overflow");
170 			w *= base - t;
171 		}
172 
173 		enforceEx!PunycodeException(ret.length < uint.max-1, "Arithmetic overflow");
174 
175 		bias = adaptBias(i - oldi, cast(uint) ret.length + 1, oldi == 0);
176 
177 		enforceEx!PunycodeException(i / (ret.length + 1) <= uint.max - n, "Arithmetic overflow");
178 		n += i / (ret.length + 1);
179 
180 		i %= ret.length + 1;
181 
182 		(() @trusted => ret.insertInPlace(i, n))();
183 
184 		i++;
185 	}
186 
187 	return ret.to!S;
188 }
189 
190 ///
191 @safe pure
192 unittest
193 {
194 	assert(punyDecode("maana-pta") == "mañana");
195 }
196 
197 
198 @safe pure
199 unittest
200 {
201 	static void assertConvertible(S)(S plain, S punycode)
202 	{
203 		assert(punyEncode(plain) == punycode);
204 		assert(punyDecode(punycode) == plain);
205 	}
206 
207 	assertCTFEable!({
208 		assertConvertible("", "");
209 		assertConvertible("ASCII0123", "ASCII0123-");
210 		assertConvertible("Punycodeぴゅにこーど", "Punycode-p73grhua1i6jv5d");
211 		assertConvertible("Punycodeぴゅにこーど"w, "Punycode-p73grhua1i6jv5d"w);
212 		assertConvertible("Punycodeぴゅにこーど"d, "Punycode-p73grhua1i6jv5d"d);
213 		assertConvertible("ぴゅにこーど", "28j1be9azfq9a");
214 		assertConvertible("他们为什么不说中文", "ihqwcrb4cv8a8dqg056pqjye");
215 		assertConvertible("☃-⌘", "--dqo34k");
216 		assertConvertible("-> $1.00 <-", "-> $1.00 <--");
217 		assertThrown!InvalidPunycodeException(punyDecode("aaa-*"));
218 		assertThrown!InvalidPunycodeException(punyDecode("aaa-p73grhua1i6jv5dd"));
219 		assertThrown!InvalidPunycodeException(punyDecode("ü-"));
220 		assert(collectExceptionMsg(punyDecode("aaa-99999999")) == "Arithmetic overflow");
221 	});
222 }
223 
224 
225 /**
226 	Exception thrown by punycode module.
227   */
228 class PunycodeException : Exception
229 {
230 	this(string msg, string file = __FILE__, size_t line = __LINE__, Throwable next = null)
231 		@safe pure nothrow
232 	{
233 		super(msg, file, line, next);
234 	}
235 }
236 
237 
238 /// ditto
239 class InvalidPunycodeException : PunycodeException
240 {
241 	this(string msg, string file = __FILE__, size_t line = __LINE__, Throwable next = null)
242 		@safe pure nothrow
243 	{
244 		super(msg, file, line, next);
245 	}
246 }
247 
248 
249 private:
250 
251 
252 enum base = 36;
253 enum initialN = 0x80;
254 enum initialBias = 72;
255 enum tmin = 1;
256 enum tmax = 26;
257 enum damp = 700;
258 enum skew = 38;
259 
260 
261 uint adaptBias(uint delta, uint numpoints, bool firsttime) @safe pure nothrow /+@nogc+/
262 {
263 	delta = firsttime ? delta / damp : delta / 2;
264 	delta += delta / numpoints;
265 
266 	uint k;
267 	while (delta > ((base - tmin) * tmax) / 2)
268 	{
269 		delta /= base - tmin;
270 		k += base;
271 	}
272 
273 	return k + (base - tmin + 1) * delta / (delta + skew);
274 }
275 
276 version (unittest) void assertCTFEable(alias f)()
277 {
278 	static assert({ f(); return true; }());
279 	f();
280 }