1 /**
2 	Punycode converter.
3 */
4 module punycode;
5 
6 
7 import std.algorithm;
8 import std.array;
9 import std.ascii;
10 import std.conv;
11 import std.exception;
12 import std.traits;
13 
14 
15 /**
16 	Converts an UTF string to a Punycode string.
17 */
18 S punyEncode(S)(S str)
19 	if (isSomeString!S)
20 {
21 	static char encodeDigit(uint x)
22 	{
23 		if (x <= 25) return cast(char)('a' + x);
24 		else if (x <= 35) return cast(char)('0' + x - 26);
25 		assert(0);
26 	}
27 
28 	auto dstr = str.to!dstring;
29 
30 	auto ret = appender!S;
31 
32 	ret ~= dstr.filter!isASCII;
33 	assert(ret.data.length <= uint.max);
34 
35 	auto handledLength = cast(uint)ret.data.length;
36 	immutable basicLength = handledLength;
37 
38 	if (handledLength > 0) ret ~= '-';
39 
40 	if (handledLength == dstr.length) return ret.data;
41 
42 	import std.functional : not;
43 	auto ms = (() @trusted => (cast(uint[])(dstr.filter!(not!isASCII).array)).sort!"a < b")();
44 
45 	dchar n = initialN;
46 	uint delta = 0;
47 	uint bias = initialBias;
48 	while (handledLength < dstr.length)
49 	{
50 		dchar m = void;
51 		while ((m = ms.front) < n) ms.popFront();
52 
53 		enforce((m - n) * (handledLength + 1) <= uint.max - delta, "Overflow occured");
54 		delta += (m - n) * (handledLength + 1);
55 
56 		n = m;
57 
58 		foreach (immutable(dchar) c; dstr)
59 		{
60 			if (c < n)
61 			{
62 				enforce(delta != uint.max, "Overflow occured");
63 				delta++;
64 			}
65 			else if (c == n)
66 			{
67 				auto q = delta;
68 
69 				for (auto k = base;;k += base)
70 				{
71 					immutable t = k <= bias ? tmin :
72 						k >= bias + tmax ? tmax : k - bias;
73 
74 					if (q < t) break;
75 
76 					ret ~= encodeDigit(t + (q - t) % (base - t));
77 					q = (q - t) / (base - t);
78 				}
79 
80 				ret ~= encodeDigit(q);
81 
82 				bias = adaptBias(delta, cast(uint)handledLength + 1, handledLength == basicLength);
83 				delta = 0;
84 				handledLength++;
85 			}
86 		}
87 		delta++;
88 		n++;
89 	}
90 
91 	return ret.data;
92 }
93 
94 ///
95 /+pure+/ @safe
96 unittest
97 {
98 	assert(punyEncode("mañana") == "maana-pta");
99 }
100 
101 
102 /**
103 	Converts a Punycode string to an UTF string.
104 */
105 S punyDecode(S)(in S str)
106 	if (isSomeString!S)
107 {
108 	static uint decodeDigit(dchar c)
109 	{
110 		if (c.isUpper) return c - 'A';
111 		if (c.isLower) return c - 'a';
112 		if (c.isDigit) return c - '0' + 26;
113 		throw new Exception("Invalid Punycode");
114 	}
115 
116 	auto dstr = str.to!dstring;
117 	assert(dstr.length <= uint.max);
118 
119 	dchar[] ret;
120 
121 	dchar n = initialN;
122 	uint i = 0;
123 	uint bias = initialBias;
124 
125 	import std..string : lastIndexOf;
126 	immutable delimIdx = dstr.lastIndexOf('-');
127 	if (delimIdx != -1)
128 	{
129 		enforce(dstr[0 .. delimIdx].all!isASCII, "Invalid Punycode");
130 		ret = dstr[0 .. delimIdx].dup;
131 	}
132 
133 	auto idx = (delimIdx == -1 || delimIdx == 0) ? 0 : delimIdx + 1;
134 
135 	while (idx < dstr.length)
136 	{
137 		immutable oldi = i;
138 		uint w = 1;
139 
140 		for (auto k = base;;k += base)
141 		{
142 			enforce(idx < dstr.length, "Invalid Punycode");
143 
144 			immutable digit = decodeDigit(dstr[idx]);
145 			idx++;
146 
147 			enforce(digit * w <= uint.max - i, "Overflow occured");
148 			i += digit * w;
149 
150 			immutable t = k <= bias ? tmin :
151 				k >= bias + tmax ? tmax : k - bias;
152 			if (digit < t) break;
153 
154 			enforce(w <= uint.max / (base - t), "Overflow occured");
155 			w *= base - t;
156 		}
157 
158 		enforce(ret.length < uint.max-1, "Overflow occured");
159 
160 		bias = adaptBias(i - oldi, cast(uint) ret.length + 1, oldi == 0);
161 
162 		enforce(i / (ret.length + 1) <= uint.max - n, "Overflow occured");
163 		n += i / (ret.length + 1);
164 
165 		i %= ret.length + 1;
166 
167 		(() @trusted => ret.insertInPlace(i, n))();
168 
169 		i++;
170 	}
171 
172 	return ret.to!S;
173 }
174 
175 ///
176 /+pure+/ @safe
177 unittest
178 {
179 	assert(punyDecode("maana-pta") == "mañana");
180 }
181 
182 
183 /+pure+/ @safe
184 unittest
185 {
186 	static void assertConvertible(S)(S plain, S punycode)
187 	{
188 		assert(punyEncode(plain) == punycode, "punyEncode");
189 		assert(punyDecode(punycode) == plain, "punyDecode");
190 	}
191 
192 	assertCTFEable!({
193 		assertConvertible("", "");
194 		assertConvertible("ASCII0123", "ASCII0123-");
195 		assertConvertible("Punycodeぴゅにこーど", "Punycode-p73grhua1i6jv5d");
196 		assertConvertible("Punycodeぴゅにこーど"w, "Punycode-p73grhua1i6jv5d"w);
197 		assertConvertible("Punycodeぴゅにこーど"d, "Punycode-p73grhua1i6jv5d"d);
198 		assertConvertible("ぴゅにこーど", "28j1be9azfq9a");
199 		assertConvertible("他们为什么不说中文", "ihqwcrb4cv8a8dqg056pqjye");
200 		assertConvertible("☃-⌘", "--dqo34k");
201 		assertConvertible("-> $1.00 <-", "-> $1.00 <--");
202 		assertThrown(punyDecode("aaa-*"));
203 		assertThrown(punyDecode("aaa-p73grhua1i6jv5dd"));
204 	});
205 }
206 
207 
208 private:
209 
210 
211 enum base = 36;
212 enum initialN = 0x80;
213 enum initialBias = 72;
214 enum tmin = 1;
215 enum tmax = 26;
216 enum damp = 700;
217 enum skew = 38;
218 
219 uint adaptBias(uint delta, uint numpoints, bool firsttime) pure @safe nothrow /+@nogc+/
220 {
221 	delta = firsttime ? delta / damp : delta / 2;
222 	delta += delta / numpoints;
223 
224 	uint k;
225 	while (delta > ((base - tmin) * tmax) / 2)
226 	{
227 		delta /= base - tmin;
228 		k += base;
229 	}
230 
231 	return k + (base - tmin + 1) * delta / (delta + skew);
232 }
233 
234 version (unittest) void assertCTFEable(alias f)()
235 {
236 	static assert({ f(); return true; }());
237 	f();
238 }