<?php
namespace Newland\Toubiz\Api\Service;

class StringCleaner
{
    protected $whiteSpaceCharacters = [' ', "\t", "\n", "\r", "\0", "\x0B", "\xA0"];
    protected $encodedWhiteSpace = [ '&nbsp;', '&#a0' ];


    /**
     * @param string $input
     * @return string
     */
    public function cleanHtmlString(string $input): string
    {
        $elements = $this->parseStringAsXmlElements($input);
        $lastIndex = \count($elements) - 1;
        $cleanedHtml = '';
        foreach ($elements as $index => $child) {
            /** @var \DOMElement $child */
            $isFirst = $index === 0;
            $isLast = $index === $lastIndex;

            if (!$this->shouldElementBeDisregarded($child, $isFirst || $isLast)) {
                $cleanedHtml .= $child->ownerDocument->saveHTML($child);
            }
        }

        return $this->cleanWhiteSpace($cleanedHtml);
    }


    private function parseStringAsXmlElements(string $xmlString)
    {
        $input = html_entity_decode($xmlString);
        $input = str_replace('&', '&amp;', $input);

        $xml = new \DOMDocument();
        $xml->loadXML('<content id="root">' . $input . '</content>');
        return $xml->childNodes[0]->childNodes;
    }


    private function shouldElementBeDisregarded(\DOMNode $element, bool $isFirstOrLast): bool
    {
        $isTextAtEdge = $isFirstOrLast && $element->nodeName === '#text';
        $isNotText = $element->nodeName !== '#text';
        $disregardIfEmpty = $isTextAtEdge || $isNotText;

        if ($disregardIfEmpty && $this->isStringEmpty($element->textContent)) {
            return true;
        }

        return false;
    }

    private function isStringEmpty(string $string): bool
    {
        $string = htmlentities($string);
        $string = $this->replaceEncodedWhiteSpaceWihActualSpaces($string);
        return $this->cleanWhiteSpace($string) === '';
    }

    private function replaceEncodedWhiteSpaceWihActualSpaces(string $input): string
    {
        $parts = array_map(
            function (string $part) {
                return preg_quote($part, '/');
            },
            $this->encodedWhiteSpace
        );
        $regex = '/(' . implode('|', $parts) . ')/i';
        return (string) preg_replace($regex, ' ', $input);
    }

    private function cleanWhiteSpace(string $input): string
    {
        $input = trim($input, implode('', $this->whiteSpaceCharacters));
        $input = str_replace("\n", '', $input);
        return $input;
    }
}
