1 /** 2 Punycode converter. 3 */ 4 module punycode; 5 6 7 import std.algorithm; 8 import std.array; 9 import std.ascii; 10 import std.conv; 11 import std.exception; 12 import std.traits; 13 14 15 /** 16 Converts an UTF string to a Punycode string. 17 */ 18 S punyEncode(S)(S str) 19 if (isSomeString!S) 20 { 21 static char encodeDigit(uint x) 22 { 23 if (x <= 25) return cast(char)('a' + x); 24 else if (x <= 35) return cast(char)('0' + x - 26); 25 assert(0); 26 } 27 28 auto dstr = str.to!dstring; 29 30 auto ret = appender!S; 31 32 ret ~= dstr.filter!isASCII; 33 assert(ret.data.length <= uint.max); 34 35 auto handledLength = cast(uint)ret.data.length; 36 immutable basicLength = handledLength; 37 38 if (handledLength > 0) ret ~= '-'; 39 40 if (handledLength == dstr.length) return ret.data; 41 42 import std.functional : not; 43 auto ms = (() @trusted => (cast(uint[])(dstr.filter!(not!isASCII).array)).sort!"a < b")(); 44 45 dchar n = initialN; 46 uint delta = 0; 47 uint bias = initialBias; 48 while (handledLength < dstr.length) 49 { 50 dchar m = void; 51 while ((m = ms.front) < n) ms.popFront(); 52 53 enforce((m - n) * (handledLength + 1) <= uint.max - delta, "Overflow occured"); 54 delta += (m - n) * (handledLength + 1); 55 56 n = m; 57 58 foreach (immutable(dchar) c; dstr) 59 { 60 if (c < n) 61 { 62 enforce(delta != uint.max, "Overflow occured"); 63 delta++; 64 } 65 else if (c == n) 66 { 67 auto q = delta; 68 69 for (auto k = base;;k += base) 70 { 71 immutable t = k <= bias ? tmin : 72 k >= bias + tmax ? tmax : k - bias; 73 74 if (q < t) break; 75 76 ret ~= encodeDigit(t + (q - t) % (base - t)); 77 q = (q - t) / (base - t); 78 } 79 80 ret ~= encodeDigit(q); 81 82 bias = adaptBias(delta, cast(uint)handledLength + 1, handledLength == basicLength); 83 delta = 0; 84 handledLength++; 85 } 86 } 87 delta++; 88 n++; 89 } 90 91 return ret.data; 92 } 93 94 /// 95 /+pure+/ @safe 96 unittest 97 { 98 assert(punyEncode("mañana") == "maana-pta"); 99 } 100 101 102 /** 103 Converts a Punycode string to an UTF string. 104 */ 105 S punyDecode(S)(in S str) 106 if (isSomeString!S) 107 { 108 static uint decodeDigit(dchar c) 109 { 110 if (c.isUpper) return c - 'A'; 111 if (c.isLower) return c - 'a'; 112 if (c.isDigit) return c - '0' + 26; 113 throw new Exception("Invalid Punycode"); 114 } 115 116 auto dstr = str.to!dstring; 117 assert(dstr.length <= uint.max); 118 119 dchar[] ret; 120 121 dchar n = initialN; 122 uint i = 0; 123 uint bias = initialBias; 124 125 import std..string : lastIndexOf; 126 immutable delimIdx = dstr.lastIndexOf('-'); 127 if (delimIdx != -1) 128 { 129 enforce(dstr[0 .. delimIdx].all!isASCII, "Invalid Punycode"); 130 ret = dstr[0 .. delimIdx].dup; 131 } 132 133 auto idx = (delimIdx == -1 || delimIdx == 0) ? 0 : delimIdx + 1; 134 135 while (idx < dstr.length) 136 { 137 immutable oldi = i; 138 uint w = 1; 139 140 for (auto k = base;;k += base) 141 { 142 enforce(idx < dstr.length, "Invalid Punycode"); 143 144 immutable digit = decodeDigit(dstr[idx]); 145 idx++; 146 147 enforce(digit * w <= uint.max - i, "Overflow occured"); 148 i += digit * w; 149 150 immutable t = k <= bias ? tmin : 151 k >= bias + tmax ? tmax : k - bias; 152 if (digit < t) break; 153 154 enforce(w <= uint.max / (base - t), "Overflow occured"); 155 w *= base - t; 156 } 157 158 enforce(ret.length < uint.max-1, "Overflow occured"); 159 160 bias = adaptBias(i - oldi, cast(uint) ret.length + 1, oldi == 0); 161 162 enforce(i / (ret.length + 1) <= uint.max - n, "Overflow occured"); 163 n += i / (ret.length + 1); 164 165 i %= ret.length + 1; 166 167 (() @trusted => ret.insertInPlace(i, n))(); 168 169 i++; 170 } 171 172 return ret.to!S; 173 } 174 175 /// 176 /+pure+/ @safe 177 unittest 178 { 179 assert(punyDecode("maana-pta") == "mañana"); 180 } 181 182 183 /+pure+/ @safe 184 unittest 185 { 186 static void assertConvertible(S)(S plain, S punycode) 187 { 188 assert(punyEncode(plain) == punycode, "punyEncode"); 189 assert(punyDecode(punycode) == plain, "punyDecode"); 190 } 191 192 assertCTFEable!({ 193 assertConvertible("", ""); 194 assertConvertible("ASCII0123", "ASCII0123-"); 195 assertConvertible("Punycodeぴゅにこーど", "Punycode-p73grhua1i6jv5d"); 196 assertConvertible("Punycodeぴゅにこーど"w, "Punycode-p73grhua1i6jv5d"w); 197 assertConvertible("Punycodeぴゅにこーど"d, "Punycode-p73grhua1i6jv5d"d); 198 assertConvertible("ぴゅにこーど", "28j1be9azfq9a"); 199 assertConvertible("他们为什么不说中文", "ihqwcrb4cv8a8dqg056pqjye"); 200 assertConvertible("☃-⌘", "--dqo34k"); 201 assertConvertible("-> $1.00 <-", "-> $1.00 <--"); 202 assertThrown(punyDecode("aaa-*")); 203 assertThrown(punyDecode("aaa-p73grhua1i6jv5dd")); 204 }); 205 } 206 207 208 private: 209 210 211 enum base = 36; 212 enum initialN = 0x80; 213 enum initialBias = 72; 214 enum tmin = 1; 215 enum tmax = 26; 216 enum damp = 700; 217 enum skew = 38; 218 219 uint adaptBias(uint delta, uint numpoints, bool firsttime) pure @safe nothrow /+@nogc+/ 220 { 221 delta = firsttime ? delta / damp : delta / 2; 222 delta += delta / numpoints; 223 224 uint k; 225 while (delta > ((base - tmin) * tmax) / 2) 226 { 227 delta /= base - tmin; 228 k += base; 229 } 230 231 return k + (base - tmin + 1) * delta / (delta + skew); 232 } 233 234 version (unittest) void assertCTFEable(alias f)() 235 { 236 static assert({ f(); return true; }()); 237 f(); 238 }