1 /** 2 Punycode converter. This module is based on the original implementation in RFC 3492, and the JavaScript implementation by Mathias Bynens. 3 4 License: Boost Software License 1.0 5 */ 6 module punycode; 7 8 9 import std.algorithm; 10 import std.array; 11 import std.ascii; 12 import std.conv; 13 import std.exception; 14 import std.traits; 15 16 17 /** 18 Converts an UTF string to a Punycode string. 19 20 Throws: 21 PunycodeException if an internal error occured. 22 23 Standards: 24 $(LINK2 https://www.ietf.org/rfc/rfc3492.txt, RFC 3492) 25 */ 26 S punyEncode(S)(S str) 27 if (isSomeString!S) 28 { 29 static char encodeDigit(uint x) 30 { 31 if (x <= 25) return cast(char)('a' + x); 32 else if (x <= 35) return cast(char)('0' + x - 26); 33 assert(0); 34 } 35 36 auto dstr = str.to!dstring; 37 38 auto ret = appender!S; 39 ret ~= dstr.filter!isASCII; 40 assert(ret.data.length <= uint.max); 41 42 auto handledLength = cast(uint)ret.data.length; 43 immutable basicLength = handledLength; 44 45 if (handledLength > 0) ret ~= '-'; 46 47 if (handledLength == dstr.length) return ret.data; 48 49 import std.functional : not; 50 auto ms = (() @trusted => (cast(uint[])(dstr.filter!(not!isASCII).array)).sort!"a < b")(); 51 52 dchar n = initialN; 53 uint delta = 0; 54 uint bias = initialBias; 55 while (handledLength < dstr.length) 56 { 57 dchar m = void; 58 while ((m = ms.front) < n) ms.popFront(); 59 60 enforceEx!PunycodeException((m - n) * (handledLength + 1) <= uint.max - delta, "Arithmetic overflow"); 61 delta += (m - n) * (handledLength + 1); 62 63 n = m; 64 65 foreach (immutable(dchar) c; dstr) 66 { 67 if (c < n) 68 { 69 enforceEx!PunycodeException(delta != uint.max, "Arithmetic overflow"); 70 delta++; 71 } 72 else if (c == n) 73 { 74 auto q = delta; 75 76 for (auto k = base;;k += base) 77 { 78 immutable t = k <= bias ? tmin : 79 k >= bias + tmax ? tmax : k - bias; 80 81 if (q < t) break; 82 83 ret ~= encodeDigit(t + (q - t) % (base - t)); 84 q = (q - t) / (base - t); 85 } 86 87 ret ~= encodeDigit(q); 88 89 bias = adaptBias(delta, cast(uint)handledLength + 1, handledLength == basicLength); 90 delta = 0; 91 handledLength++; 92 } 93 } 94 delta++; 95 n++; 96 } 97 98 return ret.data; 99 } 100 101 /// 102 @safe pure 103 unittest 104 { 105 assert(punyEncode("mañana") == "maana-pta"); 106 } 107 108 109 /** 110 Converts a Punycode string to an UTF string. 111 112 Throws: 113 PunycodeException if an internal error occured. 114 115 InvalidPunycodeException if an invalid Punycode string was passed. 116 117 Standards: 118 $(LINK2 https://www.ietf.org/rfc/rfc3492.txt, RFC 3492) 119 */ 120 S punyDecode(S)(in S str) 121 if (isSomeString!S) 122 { 123 static uint decodeDigit(dchar c) 124 { 125 if (c.isUpper) return c - 'A'; 126 if (c.isLower) return c - 'a'; 127 if (c.isDigit) return c - '0' + 26; 128 throw new InvalidPunycodeException("Invalid Punycode"); 129 } 130 131 auto dstr = str.to!dstring; 132 assert(dstr.length <= uint.max); 133 134 dchar[] ret; 135 136 dchar n = initialN; 137 uint i = 0; 138 uint bias = initialBias; 139 140 import std..string : lastIndexOf; 141 immutable delimIdx = dstr.lastIndexOf('-'); 142 if (delimIdx != -1) 143 { 144 enforceEx!InvalidPunycodeException(dstr[0 .. delimIdx].all!isASCII, "Invalid Punycode"); 145 ret = dstr[0 .. delimIdx].dup; 146 } 147 148 auto idx = (delimIdx == -1 || delimIdx == 0) ? 0 : delimIdx + 1; 149 150 while (idx < dstr.length) 151 { 152 immutable oldi = i; 153 uint w = 1; 154 155 for (auto k = base;;k += base) 156 { 157 enforceEx!InvalidPunycodeException(idx < dstr.length); 158 159 immutable digit = decodeDigit(dstr[idx]); 160 idx++; 161 162 enforceEx!PunycodeException(digit * w <= uint.max - i, "Arithmetic overflow"); 163 i += digit * w; 164 165 immutable t = k <= bias ? tmin : 166 k >= bias + tmax ? tmax : k - bias; 167 if (digit < t) break; 168 169 enforceEx!PunycodeException(w <= uint.max / (base - t), "Arithmetic overflow"); 170 w *= base - t; 171 } 172 173 enforceEx!PunycodeException(ret.length < uint.max-1, "Arithmetic overflow"); 174 175 bias = adaptBias(i - oldi, cast(uint) ret.length + 1, oldi == 0); 176 177 enforceEx!PunycodeException(i / (ret.length + 1) <= uint.max - n, "Arithmetic overflow"); 178 n += i / (ret.length + 1); 179 180 i %= ret.length + 1; 181 182 (() @trusted => ret.insertInPlace(i, n))(); 183 184 i++; 185 } 186 187 return ret.to!S; 188 } 189 190 /// 191 @safe pure 192 unittest 193 { 194 assert(punyDecode("maana-pta") == "mañana"); 195 } 196 197 198 @safe pure 199 unittest 200 { 201 static void assertConvertible(S)(S plain, S punycode) 202 { 203 assert(punyEncode(plain) == punycode); 204 assert(punyDecode(punycode) == plain); 205 } 206 207 assertCTFEable!({ 208 assertConvertible("", ""); 209 assertConvertible("ASCII0123", "ASCII0123-"); 210 assertConvertible("Punycodeぴゅにこーど", "Punycode-p73grhua1i6jv5d"); 211 assertConvertible("Punycodeぴゅにこーど"w, "Punycode-p73grhua1i6jv5d"w); 212 assertConvertible("Punycodeぴゅにこーど"d, "Punycode-p73grhua1i6jv5d"d); 213 assertConvertible("ぴゅにこーど", "28j1be9azfq9a"); 214 assertConvertible("他们为什么不说中文", "ihqwcrb4cv8a8dqg056pqjye"); 215 assertConvertible("☃-⌘", "--dqo34k"); 216 assertConvertible("-> $1.00 <-", "-> $1.00 <--"); 217 assertThrown!InvalidPunycodeException(punyDecode("aaa-*")); 218 assertThrown!InvalidPunycodeException(punyDecode("aaa-p73grhua1i6jv5dd")); 219 assertThrown!InvalidPunycodeException(punyDecode("ü-")); 220 assert(collectExceptionMsg(punyDecode("aaa-99999999")) == "Arithmetic overflow"); 221 }); 222 } 223 224 225 /** 226 Exception thrown by punycode module. 227 */ 228 class PunycodeException : Exception 229 { 230 this(string msg, string file = __FILE__, size_t line = __LINE__, Throwable next = null) 231 @safe pure nothrow 232 { 233 super(msg, file, line, next); 234 } 235 } 236 237 238 /// ditto 239 class InvalidPunycodeException : PunycodeException 240 { 241 this(string msg, string file = __FILE__, size_t line = __LINE__, Throwable next = null) 242 @safe pure nothrow 243 { 244 super(msg, file, line, next); 245 } 246 } 247 248 249 private: 250 251 252 enum base = 36; 253 enum initialN = 0x80; 254 enum initialBias = 72; 255 enum tmin = 1; 256 enum tmax = 26; 257 enum damp = 700; 258 enum skew = 38; 259 260 261 uint adaptBias(uint delta, uint numpoints, bool firsttime) @safe pure nothrow /+@nogc+/ 262 { 263 delta = firsttime ? delta / damp : delta / 2; 264 delta += delta / numpoints; 265 266 uint k; 267 while (delta > ((base - tmin) * tmax) / 2) 268 { 269 delta /= base - tmin; 270 k += base; 271 } 272 273 return k + (base - tmin + 1) * delta / (delta + skew); 274 } 275 276 version (unittest) void assertCTFEable(alias f)() 277 { 278 static assert({ f(); return true; }()); 279 f(); 280 }