DomainPart.php 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443
  1. <?php
  2. namespace Egulias\EmailValidator\Parser;
  3. use Egulias\EmailValidator\EmailLexer;
  4. use Egulias\EmailValidator\Exception\CharNotAllowed;
  5. use Egulias\EmailValidator\Exception\CommaInDomain;
  6. use Egulias\EmailValidator\Exception\ConsecutiveAt;
  7. use Egulias\EmailValidator\Exception\CRLFAtTheEnd;
  8. use Egulias\EmailValidator\Exception\CRNoLF;
  9. use Egulias\EmailValidator\Exception\DomainHyphened;
  10. use Egulias\EmailValidator\Exception\DotAtEnd;
  11. use Egulias\EmailValidator\Exception\DotAtStart;
  12. use Egulias\EmailValidator\Exception\ExpectingATEXT;
  13. use Egulias\EmailValidator\Exception\ExpectingDomainLiteralClose;
  14. use Egulias\EmailValidator\Exception\ExpectingDTEXT;
  15. use Egulias\EmailValidator\Exception\NoDomainPart;
  16. use Egulias\EmailValidator\Exception\UnopenedComment;
  17. use Egulias\EmailValidator\Warning\AddressLiteral;
  18. use Egulias\EmailValidator\Warning\CFWSWithFWS;
  19. use Egulias\EmailValidator\Warning\DeprecatedComment;
  20. use Egulias\EmailValidator\Warning\DomainLiteral;
  21. use Egulias\EmailValidator\Warning\DomainTooLong;
  22. use Egulias\EmailValidator\Warning\IPV6BadChar;
  23. use Egulias\EmailValidator\Warning\IPV6ColonEnd;
  24. use Egulias\EmailValidator\Warning\IPV6ColonStart;
  25. use Egulias\EmailValidator\Warning\IPV6Deprecated;
  26. use Egulias\EmailValidator\Warning\IPV6DoubleColon;
  27. use Egulias\EmailValidator\Warning\IPV6GroupCount;
  28. use Egulias\EmailValidator\Warning\IPV6MaxGroups;
  29. use Egulias\EmailValidator\Warning\LabelTooLong;
  30. use Egulias\EmailValidator\Warning\ObsoleteDTEXT;
  31. use Egulias\EmailValidator\Warning\TLD;
  32. class DomainPart extends Parser
  33. {
  34. const DOMAIN_MAX_LENGTH = 254;
  35. const LABEL_MAX_LENGTH = 63;
  36. /**
  37. * @var string
  38. */
  39. protected $domainPart = '';
  40. public function parse($domainPart)
  41. {
  42. $this->lexer->moveNext();
  43. $this->performDomainStartChecks();
  44. $domain = $this->doParseDomainPart();
  45. $prev = $this->lexer->getPrevious();
  46. $length = strlen($domain);
  47. if ($prev['type'] === EmailLexer::S_DOT) {
  48. throw new DotAtEnd();
  49. }
  50. if ($prev['type'] === EmailLexer::S_HYPHEN) {
  51. throw new DomainHyphened();
  52. }
  53. if ($length > self::DOMAIN_MAX_LENGTH) {
  54. $this->warnings[DomainTooLong::CODE] = new DomainTooLong();
  55. }
  56. if ($prev['type'] === EmailLexer::S_CR) {
  57. throw new CRLFAtTheEnd();
  58. }
  59. $this->domainPart = $domain;
  60. }
  61. private function performDomainStartChecks()
  62. {
  63. $this->checkInvalidTokensAfterAT();
  64. $this->checkEmptyDomain();
  65. if ($this->lexer->token['type'] === EmailLexer::S_OPENPARENTHESIS) {
  66. $this->warnings[DeprecatedComment::CODE] = new DeprecatedComment();
  67. $this->parseDomainComments();
  68. }
  69. }
  70. private function checkEmptyDomain()
  71. {
  72. $thereIsNoDomain = $this->lexer->token['type'] === EmailLexer::S_EMPTY ||
  73. ($this->lexer->token['type'] === EmailLexer::S_SP &&
  74. !$this->lexer->isNextToken(EmailLexer::GENERIC));
  75. if ($thereIsNoDomain) {
  76. throw new NoDomainPart();
  77. }
  78. }
  79. private function checkInvalidTokensAfterAT()
  80. {
  81. if ($this->lexer->token['type'] === EmailLexer::S_DOT) {
  82. throw new DotAtStart();
  83. }
  84. if ($this->lexer->token['type'] === EmailLexer::S_HYPHEN) {
  85. throw new DomainHyphened();
  86. }
  87. }
  88. /**
  89. * @return string
  90. */
  91. public function getDomainPart()
  92. {
  93. return $this->domainPart;
  94. }
  95. /**
  96. * @param string $addressLiteral
  97. * @param int $maxGroups
  98. */
  99. public function checkIPV6Tag($addressLiteral, $maxGroups = 8)
  100. {
  101. $prev = $this->lexer->getPrevious();
  102. if ($prev['type'] === EmailLexer::S_COLON) {
  103. $this->warnings[IPV6ColonEnd::CODE] = new IPV6ColonEnd();
  104. }
  105. $IPv6 = substr($addressLiteral, 5);
  106. //Daniel Marschall's new IPv6 testing strategy
  107. $matchesIP = explode(':', $IPv6);
  108. $groupCount = count($matchesIP);
  109. $colons = strpos($IPv6, '::');
  110. if (count(preg_grep('/^[0-9A-Fa-f]{0,4}$/', $matchesIP, PREG_GREP_INVERT)) !== 0) {
  111. $this->warnings[IPV6BadChar::CODE] = new IPV6BadChar();
  112. }
  113. if ($colons === false) {
  114. // We need exactly the right number of groups
  115. if ($groupCount !== $maxGroups) {
  116. $this->warnings[IPV6GroupCount::CODE] = new IPV6GroupCount();
  117. }
  118. return;
  119. }
  120. if ($colons !== strrpos($IPv6, '::')) {
  121. $this->warnings[IPV6DoubleColon::CODE] = new IPV6DoubleColon();
  122. return;
  123. }
  124. if ($colons === 0 || $colons === (strlen($IPv6) - 2)) {
  125. // RFC 4291 allows :: at the start or end of an address
  126. //with 7 other groups in addition
  127. ++$maxGroups;
  128. }
  129. if ($groupCount > $maxGroups) {
  130. $this->warnings[IPV6MaxGroups::CODE] = new IPV6MaxGroups();
  131. } elseif ($groupCount === $maxGroups) {
  132. $this->warnings[IPV6Deprecated::CODE] = new IPV6Deprecated();
  133. }
  134. }
  135. /**
  136. * @return string
  137. */
  138. protected function doParseDomainPart()
  139. {
  140. $domain = '';
  141. $label = '';
  142. $openedParenthesis = 0;
  143. do {
  144. $prev = $this->lexer->getPrevious();
  145. $this->checkNotAllowedChars($this->lexer->token);
  146. if ($this->lexer->token['type'] === EmailLexer::S_OPENPARENTHESIS) {
  147. $this->parseComments();
  148. $openedParenthesis += $this->getOpenedParenthesis();
  149. $this->lexer->moveNext();
  150. $tmpPrev = $this->lexer->getPrevious();
  151. if ($tmpPrev['type'] === EmailLexer::S_CLOSEPARENTHESIS) {
  152. $openedParenthesis--;
  153. }
  154. }
  155. if ($this->lexer->token['type'] === EmailLexer::S_CLOSEPARENTHESIS) {
  156. if ($openedParenthesis === 0) {
  157. throw new UnopenedComment();
  158. } else {
  159. $openedParenthesis--;
  160. }
  161. }
  162. $this->checkConsecutiveDots();
  163. $this->checkDomainPartExceptions($prev);
  164. if ($this->hasBrackets()) {
  165. $this->parseDomainLiteral();
  166. }
  167. if ($this->lexer->token['type'] === EmailLexer::S_DOT) {
  168. $this->checkLabelLength($label);
  169. $label = '';
  170. } else {
  171. $label .= $this->lexer->token['value'];
  172. }
  173. if ($this->isFWS()) {
  174. $this->parseFWS();
  175. }
  176. $domain .= $this->lexer->token['value'];
  177. $this->lexer->moveNext();
  178. if ($this->lexer->token['type'] === EmailLexer::S_SP) {
  179. throw new CharNotAllowed();
  180. }
  181. } while (null !== $this->lexer->token['type']);
  182. $this->checkLabelLength($label);
  183. return $domain;
  184. }
  185. private function checkNotAllowedChars(array $token)
  186. {
  187. $notAllowed = [EmailLexer::S_BACKSLASH => true, EmailLexer::S_SLASH=> true];
  188. if (isset($notAllowed[$token['type']])) {
  189. throw new CharNotAllowed();
  190. }
  191. }
  192. /**
  193. * @return string|false
  194. */
  195. protected function parseDomainLiteral()
  196. {
  197. if ($this->lexer->isNextToken(EmailLexer::S_COLON)) {
  198. $this->warnings[IPV6ColonStart::CODE] = new IPV6ColonStart();
  199. }
  200. if ($this->lexer->isNextToken(EmailLexer::S_IPV6TAG)) {
  201. $lexer = clone $this->lexer;
  202. $lexer->moveNext();
  203. if ($lexer->isNextToken(EmailLexer::S_DOUBLECOLON)) {
  204. $this->warnings[IPV6ColonStart::CODE] = new IPV6ColonStart();
  205. }
  206. }
  207. return $this->doParseDomainLiteral();
  208. }
  209. /**
  210. * @return string|false
  211. */
  212. protected function doParseDomainLiteral()
  213. {
  214. $IPv6TAG = false;
  215. $addressLiteral = '';
  216. do {
  217. if ($this->lexer->token['type'] === EmailLexer::C_NUL) {
  218. throw new ExpectingDTEXT();
  219. }
  220. if ($this->lexer->token['type'] === EmailLexer::INVALID ||
  221. $this->lexer->token['type'] === EmailLexer::C_DEL ||
  222. $this->lexer->token['type'] === EmailLexer::S_LF
  223. ) {
  224. $this->warnings[ObsoleteDTEXT::CODE] = new ObsoleteDTEXT();
  225. }
  226. if ($this->lexer->isNextTokenAny(array(EmailLexer::S_OPENQBRACKET, EmailLexer::S_OPENBRACKET))) {
  227. throw new ExpectingDTEXT();
  228. }
  229. if ($this->lexer->isNextTokenAny(
  230. array(EmailLexer::S_HTAB, EmailLexer::S_SP, $this->lexer->token['type'] === EmailLexer::CRLF)
  231. )) {
  232. $this->warnings[CFWSWithFWS::CODE] = new CFWSWithFWS();
  233. $this->parseFWS();
  234. }
  235. if ($this->lexer->isNextToken(EmailLexer::S_CR)) {
  236. throw new CRNoLF();
  237. }
  238. if ($this->lexer->token['type'] === EmailLexer::S_BACKSLASH) {
  239. $this->warnings[ObsoleteDTEXT::CODE] = new ObsoleteDTEXT();
  240. $addressLiteral .= $this->lexer->token['value'];
  241. $this->lexer->moveNext();
  242. $this->validateQuotedPair();
  243. }
  244. if ($this->lexer->token['type'] === EmailLexer::S_IPV6TAG) {
  245. $IPv6TAG = true;
  246. }
  247. if ($this->lexer->token['type'] === EmailLexer::S_CLOSEQBRACKET) {
  248. break;
  249. }
  250. $addressLiteral .= $this->lexer->token['value'];
  251. } while ($this->lexer->moveNext());
  252. $addressLiteral = str_replace('[', '', $addressLiteral);
  253. $addressLiteral = $this->checkIPV4Tag($addressLiteral);
  254. if (false === $addressLiteral) {
  255. return $addressLiteral;
  256. }
  257. if (!$IPv6TAG) {
  258. $this->warnings[DomainLiteral::CODE] = new DomainLiteral();
  259. return $addressLiteral;
  260. }
  261. $this->warnings[AddressLiteral::CODE] = new AddressLiteral();
  262. $this->checkIPV6Tag($addressLiteral);
  263. return $addressLiteral;
  264. }
  265. /**
  266. * @param string $addressLiteral
  267. *
  268. * @return string|false
  269. */
  270. protected function checkIPV4Tag($addressLiteral)
  271. {
  272. $matchesIP = array();
  273. // Extract IPv4 part from the end of the address-literal (if there is one)
  274. if (preg_match(
  275. '/\\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$/',
  276. $addressLiteral,
  277. $matchesIP
  278. ) > 0
  279. ) {
  280. $index = strrpos($addressLiteral, $matchesIP[0]);
  281. if ($index === 0) {
  282. $this->warnings[AddressLiteral::CODE] = new AddressLiteral();
  283. return false;
  284. }
  285. // Convert IPv4 part to IPv6 format for further testing
  286. $addressLiteral = substr($addressLiteral, 0, (int) $index) . '0:0';
  287. }
  288. return $addressLiteral;
  289. }
  290. protected function checkDomainPartExceptions(array $prev)
  291. {
  292. $invalidDomainTokens = array(
  293. EmailLexer::S_DQUOTE => true,
  294. EmailLexer::S_SQUOTE => true,
  295. EmailLexer::S_BACKTICK => true,
  296. EmailLexer::S_SEMICOLON => true,
  297. EmailLexer::S_GREATERTHAN => true,
  298. EmailLexer::S_LOWERTHAN => true,
  299. );
  300. if (isset($invalidDomainTokens[$this->lexer->token['type']])) {
  301. throw new ExpectingATEXT();
  302. }
  303. if ($this->lexer->token['type'] === EmailLexer::S_COMMA) {
  304. throw new CommaInDomain();
  305. }
  306. if ($this->lexer->token['type'] === EmailLexer::S_AT) {
  307. throw new ConsecutiveAt();
  308. }
  309. if ($this->lexer->token['type'] === EmailLexer::S_OPENQBRACKET && $prev['type'] !== EmailLexer::S_AT) {
  310. throw new ExpectingATEXT();
  311. }
  312. if ($this->lexer->token['type'] === EmailLexer::S_HYPHEN && $this->lexer->isNextToken(EmailLexer::S_DOT)) {
  313. throw new DomainHyphened();
  314. }
  315. if ($this->lexer->token['type'] === EmailLexer::S_BACKSLASH
  316. && $this->lexer->isNextToken(EmailLexer::GENERIC)) {
  317. throw new ExpectingATEXT();
  318. }
  319. }
  320. /**
  321. * @return bool
  322. */
  323. protected function hasBrackets()
  324. {
  325. if ($this->lexer->token['type'] !== EmailLexer::S_OPENBRACKET) {
  326. return false;
  327. }
  328. try {
  329. $this->lexer->find(EmailLexer::S_CLOSEBRACKET);
  330. } catch (\RuntimeException $e) {
  331. throw new ExpectingDomainLiteralClose();
  332. }
  333. return true;
  334. }
  335. /**
  336. * @param string $label
  337. */
  338. protected function checkLabelLength($label)
  339. {
  340. if ($this->isLabelTooLong($label)) {
  341. $this->warnings[LabelTooLong::CODE] = new LabelTooLong();
  342. }
  343. }
  344. /**
  345. * @param string $label
  346. * @return bool
  347. */
  348. private function isLabelTooLong($label)
  349. {
  350. if (preg_match('/[^\x00-\x7F]/', $label)) {
  351. idn_to_ascii($label, IDNA_DEFAULT, INTL_IDNA_VARIANT_UTS46, $idnaInfo);
  352. return (bool) ($idnaInfo['errors'] & IDNA_ERROR_LABEL_TOO_LONG);
  353. }
  354. return strlen($label) > self::LABEL_MAX_LENGTH;
  355. }
  356. protected function parseDomainComments()
  357. {
  358. $this->isUnclosedComment();
  359. while (!$this->lexer->isNextToken(EmailLexer::S_CLOSEPARENTHESIS)) {
  360. $this->warnEscaping();
  361. $this->lexer->moveNext();
  362. }
  363. $this->lexer->moveNext();
  364. if ($this->lexer->isNextToken(EmailLexer::S_DOT)) {
  365. throw new ExpectingATEXT();
  366. }
  367. }
  368. protected function addTLDWarnings()
  369. {
  370. if ($this->warnings[DomainLiteral::CODE]) {
  371. $this->warnings[TLD::CODE] = new TLD();
  372. }
  373. }
  374. }