#!/usr/bin/env python3
"""
sligro_xml_to_json.py
=====================
Converteert het Sligro articles.xml naar een geoptimaliseerd JSON-bestand
dat FileMaker via Insert from URL kan inladen — zonder BE_XPath overhead.

Deployment: /var/www/html/sligro/downloads/sligro_xml_to_json.py
Cron:       0 3 * * * /usr/bin/python3 /var/www/html/sligro/downloads/sligro_xml_to_json.py

Input:      /var/www/html/sligro/downloads/latest.xml      (gedownload door bestaande cron)
Output:     /var/www/html/sligro/downloads/latest.json     (geserveerd via https://ai-pdf.seeyouresto.com/sligro-downloads/latest.json)
Log:        /var/log/sligro_convert.log
"""

import json
import logging
import os
import sys
import time
from datetime import datetime, timezone
from lxml import etree   # sudo apt install python3-lxml  (of: pip3 install lxml)

# ---------------------------------------------------------------------------
# CONFIG
# ---------------------------------------------------------------------------
CUSTOMER_ID = sys.argv[1] if len(sys.argv) > 1 else "948496"
BASE_PATH = f"/var/www/html/sligro/{CUSTOMER_ID}/downloads"
XML_PATH  = f"{BASE_PATH}/latest.xml"
JSON_PATH = f"{BASE_PATH}/latest.json"
LOG_PATH  = f"/var/log/sligro_convert_{CUSTOMER_ID}.log"

# Allergen code mapping: Sligro code → interne SYR waarde-ID
ALLERGEN_MAP = {
    "AW":  "134.1",   # Gluten
    "BM":  "134.10",  # Mosterd
    "AS":  "134.11",  # Sesamzaad
    "AU":  "134.12",  # Sulfieten
    "NL":  "134.13",  # Lupine
    "UM":  "134.14",  # Weekdieren
    "GO":  "134.15",  # Haver
    "UW":  "134.16",  # Tarwe
    "AC":  "134.2",   # Schaaldieren
    "AE":  "134.3",   # Eieren
    "AF":  "134.4",   # Vis
    "AP":  "134.5",   # Aardnoten
    "AY":  "134.6",   # Soja
    "AM":  "134.7",   # Melk
    "AN":  "134.8",   # Noten
    "BC":  "134.9",   # Selderij
}

BTW_MAP = {
    "LOW":    6,
    "MIDDLE": 12,
    "HIGH":   21,
}

CATEGORY_MAP = {
    "AARDAPPELEN BEWERKT":("32.3","36.11"),
    "AARDAPPELEN ONBEWERKT":("32.3","36.11"),
    "AARDAPPELPRODUKTEN DIEPVRIES":("32.4","36.11"),
    "VERSE AARDAPPELPRODUKTEN":("32.3","36.11"),
    "GROENTEN BEWERKT":("32.3","36.11"),
    "GROENTEN ONBEWERKT":("32.3","36.11"),
    "FRUIT BEWERKT":("32.3","36.11"),
    "FRUIT ONBEWERKT":("32.3","36.11"),
    "GROEN&FRUIT DIEPVR. FOODSERVIC":("32.4","36.11"),
    "GROENTECONSERVEN, PEULVRUCHTEN":("32.5","36.11"),
    "TOMATENCONSERVEN":("32.5","36.11"),
    "VRUCHTENCONSERVEN":("32.5","36.11"),
    "ZUIDVRUCHTEN":("32.5","36.11"),
    "BAKE OFF DIEPVRIES":("32.4","36.6"),
    "BROOD GASPACK":("32.3","36.6"),
    "BROODPRODUCTEN DIEPVRIES":("32.4","36.6"),
    "BROODVERVANGERS":("32.5","36.6"),
    "PIZZA DIEPVRIES":("32.4","36.6"),
    "VLEES VERS CONC":("32.3","36.8"),
    "VLEES DIEPVRIES":("32.4","36.8"),
    "VLEES DIEPVRIES SLAGERIJ CONC":("32.4","36.8"),
    "VLEES BEREID CONV 1530 CONC":("32.3","36.8"),
    "VLEES BEREID CONV 2300 CONC":("32.3","36.8"),
    "VLEESCONSERVEN":("32.5","36.8"),
    "WILD VERS NCON":("32.3","36.8"),
    "WILD DIEPVRIES NCON":("32.4","36.8"),
    "VLEESWAREN BULK":("32.3","36.7"),
    "VLEESWAREN VERPAKT":("32.3","36.7"),
    "DROGE WORST":("32.3","36.7"),
    "GEVOGELTE VERS NCON":("32.3","36.17"),
    "GEVOGELTE DIEPVRIES NCON":("32.4","36.17"),
    "VIS VERS":("32.3","36.9"),
    "VIS GEKOELD":("32.3","36.9"),
    "VIS DIEPVRIES":("32.4","36.9"),
    "VISCONSERVEN":("32.5","36.9"),
    "SCHAAL EN SCHELPDIEREN":("32.3","36.10"),
    "KAAS BUITENLAND UITSNIJ":("32.3","36.19"),
    "KAAS BUITENLAND VERPAKT":("32.3","36.19"),
    "KAAS HOLLAND VERS VOORVERPAKT":("32.3","36.19"),
    "ROOMPRODUCTEN":("32.3","36.19"),
    "MELKPRODUKTEN DAGVERS":("32.3","36.19"),
    "EIEREN VERS":("32.3","36.19"),
    "EIERPRODUCTEN GEKOELD":("32.3","36.19"),
    "BOTER":("32.3","36.19"),
    "MARGARINE":("32.3","36.19"),
    "ZUIVEL HOUDBAAR":("32.5","36.19"),
    "KOFFIEMELK & CREAMER":("32.5","36.19"),
    "HORECA DIEPVRIES":("32.4","36.16"),
    "MAALTIJD,SOEP,PASTA DIEPVRIES":("32.4","36.16"),
    "MAALTIJDEN & -COMPONENTEN KOEL":("32.3","36.16"),
    "MINISNACKS BORRELHAPJES":("32.4","36.16"),
    "GEBAK EN PATISSERIE DIEPVRIES":("32.4","36.16"),
    "IJS EN PUDDING":("32.4","36.16"),
    "SALADES":("32.3","36.16"),
    "MEDITERRAANS VERS":("32.3","36.16"),
    "BAKPRODUKTEN":("32.5","36.2"),
    "PASTA EN PASTASAUZEN":("32.5","36.2"),
    "RIJST EN GRANEN":("32.5","36.2"),
    "KRUIDEN EN SPECERIJEN":("32.5","36.2"),
    "AZIATISCHE KEUKEN":("32.5","36.2"),
    "WERELD KEUKEN":("32.5","36.2"),
    "MEXICAANSE KEUKEN":("32.5","36.2"),
    "SOEP DROOG & SMAAKVERSTERKERS":("32.5","36.2"),
    "SOEP BOUILLON NAT":("32.5","36.2"),
    "SOEPBENODIGDHEDEN":("32.5","36.2"),
    "VLEES- VIS EN GROENTESAUZEN":("32.5","36.2"),
    "SNACK- EN TAFELSAUZEN":("32.5","36.2"),
    "AZIJN EN DRESSINGS":("32.5","36.2"),
    "OLIJVEN EN ANTIPASTI":("32.5","36.2"),
    "TAFELZUREN":("32.5","36.2"),
    "OLIEN":("32.5","36.2"),
    "VETTEN":("32.5","36.2"),
    "SUIKER & ZOETSTOFFEN":("32.5","36.2"),
    "BOTERHAMARTIKELEN":("32.5","36.2"),
    "TOAST":("32.5","36.2"),
    "KOEK & BANKET GROOTVERBRUIK":("32.5","36.2"),
    "KOEK & BANKET RETAIL":("32.5","36.2"),
    "CHIPS EN SNACKS":("32.5","36.2"),
    "NOTEN":("32.5","36.2"),
    "BONBONS":("32.5","36.2"),
    "BARS EN TABLETTEN":("32.5","36.2"),
    "KINDERSTUKSARTIKELEN":("32.5","36.2"),
    "WICHTGOED":("32.5","36.2"),
    "SUIKERWERK":("32.5","36.2"),
    "PATISSERIEPRODUKTEN":("32.5","36.2"),
    "IJSBENODIGDHEDEN":("32.5","36.2"),
    "ZOUT":("32.5","36.2"),
    "O'LIFE":("32.5","36.2"),
    "WATERS":("32.1","36.5"),
    "FRISDRANKEN KLEINVERPAKKING":("32.1","36.5"),
    "FRISDRANKEN GROOTVERPAKKING":("32.1","36.5"),
    "SAPPEN & FRUITDRANKEN":("32.1","36.5"),
    "VERSE SAPPEN":("32.1","36.5"),
    "SIROPEN":("32.1","36.5"),
    "THEE":("32.1","36.4"),
    "KOFFIE, CACAO & OPLOSKOFFIE":("32.1","36.4"),
    "WIJNEN":("32.1","36.1"),
    "SPECIAALBIER KLEINVERPAKKING":("32.1","36.3"),
    "GEDISTILLEERD":("32.1","36.3"),
    "GEDISTILLEERD TOT 15%":("32.1","36.3"),
    "PORT SHERRY VERMOUTH APERITIEF":("32.1","36.3"),
    "AFWAS- & VAATMIDDELEN":("32.6","36.15"),
    "REINIGINGSMIDDELEN":("32.6","36.15"),
    "WASMIDDELEN":("32.6","36.15"),
    "SCHOONMAAKARTIKELEN":("32.6","36.15"),
    "TOILET- & KEUKENPAPIER":("32.6","36.15"),
    "PAPIEREN-TAFELBENODIGDHEDEN":("32.6","36.15"),
    "VERPAKKINGSMAT./DISPOS. GROOTV":("32.6","36.15"),
    "AFVALZAKKEN":("32.6","36.15"),
    "KEUKENGEREEDSCHAPPEN":("32.6","36.15"),
    "BUFFET, CHAFERS & GN-BAKKEN":("32.6","36.15"),
    "PANNEN":("32.6","36.15"),
    "HORECA-APPARATUUR":("32.6","36.15"),
    "BEDRIJFSBENODIGDHEDEN":("32.6","36.15"),
    "BEDRIJFSKLEDING":("32.6","36.15"),
    "RESTAURANTBENODIGDHEDEN":("32.6","36.15"),
    "HOUTSKOOL":("32.6","36.15"),
    "KAARSEN EN KANDELAARS":("32.6","36.15"),
    "DECORATIE- EN FEESTARTIKELEN":("32.6","36.15"),
    "COSMETICA":("32.6","36.15"),
    "KANTOOR":("32.6","36.15"),
    "DIER":("32.6","36.15"),
    "ARTIKELEN KERSTPAKKETTEN NONFD":("32.6","36.15"),
    "GLASWERK":("32.6","36.12"),
    "SERVIEZEN":("32.6","36.14"),
    "BESTEKKEN":("32.6","36.13"),
    "DIVERSEN":("",""),
}

# ---------------------------------------------------------------------------
# LOGGING
# ---------------------------------------------------------------------------
logging.basicConfig(
    filename=LOG_PATH,
    level=logging.INFO,
    format="%(asctime)s  %(levelname)s  %(message)s",
    datefmt="%Y-%m-%d %H:%M:%S",
)
log = logging.getLogger(__name__)

# ---------------------------------------------------------------------------
# HELPERS
# ---------------------------------------------------------------------------
NS = "*"  # we gebruiken local-name() matching net zoals het FM-script

def lx(element, tag):
    """Geef eerste child-element terug met gegeven local-name, of None."""
    results = element.xpath(f"*[local-name()='{tag}']")
    return results[0] if results else None

def lxt(element, tag, default=""):
    """Geef text van eerste child met local-name() = tag, of default."""
    el = lx(element, tag)
    return (el.text or "").strip() if el is not None else default

def lxa(element, tag, attr, default=""):
    """Geef attribuut van eerste child met local-name() = tag, of default."""
    el = lx(element, tag)
    return (el.get(attr) or "").strip() if el is not None else default

def lxdesc(element, tag, locale):
    """Geef description[@locale=...] binnen een container-element."""
    container = lx(element, tag)
    if container is None:
        return ""
    results = container.xpath(f"*[local-name()='description'][@locale='{locale}']")
    if results:
        return (results[0].text or "").strip()
    return ""

def get_price(item):
    """
    Zoek invoicePrice voor minimumOrderQuantityScale = 1.
    Fallback: eerste priceTable.
    """
    # Probeer priceTables > priceTable
    for tables_tag in ["priceTables", "priceTable"]:
        tables = item.xpath(f"*[local-name()='{tables_tag}']")
        if not tables:
            continue
        for table in tables[0].xpath("*[local-name()='priceTable']") or [tables[0]]:
            scale_el = table.xpath("*[local-name()='minimumOrderQuantityScale']")
            scale = scale_el[0].text.strip() if scale_el else ""
            if scale == "1":
                price_el = table.xpath("*[local-name()='invoicePrice']")
                if price_el and price_el[0].text:
                    return price_el[0].text.strip()
        # Fallback: eerste priceTable, eerste invoicePrice
        all_tables = tables[0].xpath("*[local-name()='priceTable']") or [tables[0]]
        if all_tables:
            price_el = all_tables[0].xpath("*[local-name()='invoicePrice']")
            if price_el and price_el[0].text:
                return price_el[0].text.strip()
    return ""

def get_allergens(item):
    """
    Geeft twee lijsten terug:
      mapped   = interne SYR waarde-ID's (tab-separated string voor FM)
      unmapped = onbekende codes (voor logging)
    Enkel CONTAINS en MAY_CONTAIN.
    """
    mapped   = set()
    unmapped = set()
    allergens_el = lx(item, "allergens")
    if allergens_el is None:
        return "", ""
    for allergen in allergens_el.xpath("*[local-name()='allergen']"):
        level_el = allergen.xpath("*[local-name()='levelOfContainment']")
        if not level_el:
            continue
        level = (level_el[0].text or "").upper()
        if level not in ("CONTAINS", "MAY_CONTAIN"):
            continue
        code_els = allergen.xpath("*[local-name()='allergenTypeName']")
        if not code_els:
            continue
        code = code_els[0].get("allergenCode", "").upper()
        if not code:
            continue
        if code in ALLERGEN_MAP:
            mapped.add(ALLERGEN_MAP[code])
        else:
            unmapped.add(code)
    mapped_str   = "\n".join(sorted(mapped))
    unmapped_str = "\n".join(sorted(unmapped))
    return mapped_str, unmapped_str

def get_portion_options(item):
    """Geeft portionSizes en portionPackagings als newline-lists."""
    packaging_type = lxt(item, "sfgPackagingType").lower()
    if packaging_type != "port":
        return "", ""

    sizes = []
    sizes_el = item.xpath("*[local-name()='portionSizes']/*[local-name()='portionSize']")
    for s in sizes_el:
        sizes.append(s.text.strip() if s.text else "")

    packagings = []
    pack_el = item.xpath("*[local-name()='portionPackagings']/*[local-name()='portionPackaging']")
    for p in pack_el:
        packagings.append(p.text.strip() if p.text else "")

    return "\n".join(sizes), "\n".join(packagings)

def get_ean(item):
    """EAN via childs/childItem/childItemIdentification."""
    results = item.xpath(
        "*[local-name()='childs']"
        "/*[local-name()='childItem']"
        "/*[local-name()='childItemIdentification']"
    )
    return (results[0].text or "").strip() if results else ""

# ---------------------------------------------------------------------------
# MAIN CONVERTER
# ---------------------------------------------------------------------------
def convert():
    t0 = time.time()
    log.info("=== Sligro XML → JSON conversie gestart ===")

    if not os.path.exists(XML_PATH):
        log.error(f"XML niet gevonden: {XML_PATH}")
        sys.exit(1)

    log.info(f"XML inladen: {XML_PATH}")
    try:
        tree = etree.parse(XML_PATH)
    except etree.XMLSyntaxError as e:
        log.error(f"XML parse fout: {e}")
        sys.exit(1)

    root = tree.getroot()
    items = root.xpath("//*[local-name()='tradeItem']")
    total = len(items)
    log.info(f"Aantal artikelen gevonden: {total}")

    articles = []
    unmapped_all = []

    for i, item in enumerate(items, 1):
        # --- Basisgegevens ---
        code         = lxt(item, "sfgItemNumber")
        ean          = get_ean(item)
        nom_nl       = lxdesc(item, "productTitleOnline", "nl_BE")
        nom_fr       = lxdesc(item, "productTitleOnline", "fr_BE")
        fabrikant    = lxt(item, "sfgNameOfSupplier")

        # --- Classificatie ---
        groep_nl     = lxdesc(item, "sfgCatalogGroupDescription",    "nl_BE")
        groep_fr     = lxdesc(item, "sfgCatalogGroupDescription",    "fr_BE")
        subgroep_nl  = lxdesc(item, "sfgCatalogSubGroupDescription", "nl_BE")
        subgroep_fr  = lxdesc(item, "sfgCatalogSubGroupDescription", "fr_BE")


        # --- SYR categorie mapping ---
        syr_groep, syr_subgroep = CATEGORY_MAP.get(groep_nl, ("", ""))
        # --- Verpakking ---
        unit_article   = lxa(item,  "sfgNetContent", "unitOfMeasure")
        cont_article   = unit_article
        packaging_type = lxt(item,  "sfgPackagingType")
        unit_portie    = lxa(item,  "sfgPackagingType", "unitOfMeasure")
        emb_achat      = lxa(item,  "sfgCalculatedQuantity", "unitOfMeasure")
        q_emb_txt      = lxt(item,  "sfgNetContent")
        gem_gewicht    = lxt(item,  "sfgTotalNetVolume")
        min_order_txt  = ""
        # minimumOrderQuantityScale ophalen
        scale_els = item.xpath(
            "*[local-name()='priceTables']/*[local-name()='priceTable'][1]"
            "/*[local-name()='minimumOrderQuantityScale']"
        ) or item.xpath(
            "*[local-name()='priceTable'][1]/*[local-name()='minimumOrderQuantityScale']"
        )
        if scale_els:
            min_order_txt = (scale_els[0].text or "").strip()

        te_wegen = 1 if (packaging_type == "STWG" or unit_portie) else 0

        # Aantal consumptie-eenheden per aankoopverpakking (bv. 12 ST per doos)
        q_consumption      = lxt(item, "sfgQuantityOfConsumptionUnits")
        q_consumption_unit = lxa(item, "sfgQuantityOfConsumptionUnits", "unitOfMeasure")

        # Aankoopverpakking type (bv. "DS"=doos, "ST"=stuk) + aantal
        trade_item_qty      = lxt(item, "sfgTradeItemQuantity")
        trade_item_qty_unit = lxa(item, "sfgTradeItemQuantity", "unitOfMeasure")

        # Bestelbare EAN (kan afwijken van consumer EAN)
        ean_orderable = lxt(item, "sfgOrderableTradeItemIdentification")

        # Merk
        brand_name = lxt(item, "sfgBrandName")

        # Leesbare verpakkingsbeschrijving (bv. "Doos 12 x 500 gram")
        content_desc_nl = lxdesc(item, "contentDescriptionOnline", "nl_BE")
        content_desc_fr = lxdesc(item, "contentDescriptionOnline", "fr_BE")

        # Interne Sligro-omschrijving (kortere handelsnaam)
        sfg_desc_nl = lxdesc(item, "sfgTradeItemDescription", "nl_BE")
        sfg_desc_fr = lxdesc(item, "sfgTradeItemDescription", "fr_BE")

        size_options, pack_options = get_portion_options(item)

        # --- Extra info tekst ---
        add_desc   = lxt(item, "additionalTradeItemDescription")
        net_cont   = lxt(item, "sfgNetContentPerTradeItemUnit")
        vol_cont   = lxt(item, "sfgVolumePerConsumptionUnit")
        allerg_stmt= lxt(item, "allergenStatement")
        info_txt   = "\n".join(filter(None, [add_desc, net_cont, vol_cont, allerg_stmt]))

        # --- Prijs ---
        prijs = get_price(item)

        # --- BTW ---
        btw_code = lxt(item, "sfgTaxApplicabilityCode")
        btw      = BTW_MAP.get(btw_code, 0)

        # --- Allergenen ---
        allergens_mapped, allergens_unmapped = get_allergens(item)
        if allergens_unmapped:
            unmapped_all.append(f"{code} | {nom_nl} → {allergens_unmapped}")

        # --- Afbeeldingen (enkel URL opslaan, download later) ---
        img_large  = lxt(item, "sfgAssetLargeURL")
        img_medium = lxt(item, "sfgAssetMediumURL")
        img_small  = lxt(item, "sfgAssetSmallURL")

        # --- Artikel object ---
        article = {
            "code":          code,
            "ean":           ean,
            "nom_nl":        nom_nl,
            "nom_fr":        nom_fr,
            "fabrikant":     fabrikant,
            "groep_nl":      groep_nl,
            "syr_groep":     syr_groep,
            "syr_subgroep":  syr_subgroep,
            "groep_fr":      groep_fr,
            "subgroep_nl":   subgroep_nl,
            "subgroep_fr":   subgroep_fr,
            "unit_article":      unit_article,
            "cont_article":      cont_article,
            "unit_portie":       unit_portie,
            "emb_achat":         emb_achat,
            "q_emb":             q_emb_txt,
            "cont_emb":          cont_article if not unit_portie else "2.28",
            "q_consumption":     q_consumption,
            "q_consumption_unit":q_consumption_unit,
            "trade_item_qty":    trade_item_qty,
            "trade_item_qty_unit":trade_item_qty_unit,
            "gem_gewicht":       gem_gewicht,
            "min_order":         min_order_txt,
            "te_wegen":          te_wegen,
            "ean_orderable":     ean_orderable,
            "brand_name":        brand_name,
            "content_desc_nl":   content_desc_nl,
            "content_desc_fr":   content_desc_fr,
            "sfg_desc_nl":       sfg_desc_nl,
            "sfg_desc_fr":       sfg_desc_fr,
            "size_options":      size_options,
            "pack_options":      pack_options,
            "info_txt":      info_txt,
            "prijs":         prijs,
            "btw":           btw,
            "allergens":     allergens_mapped,
            "img_large":     img_large,
            "img_medium":    img_medium,
            "img_small":     img_small,
        }
        articles.append(article)

        if i % 100 == 0:
            log.info(f"  Verwerkt: {i}/{total}")

    # --- Output JSON schrijven ---
    output = {
        "generated_at": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
        "total":        total,
        "articles":     articles,
    }

    json_tmp = JSON_PATH + ".tmp"
    with open(json_tmp, "w", encoding="utf-8") as f:
        json.dump(output, f, ensure_ascii=False, separators=(",", ":"))

    os.replace(json_tmp, JSON_PATH)  # atomische swap — nooit een half bestand
    # gzip aanmaken voor snellere download door FM
    import gzip
    import shutil
    gz_path = JSON_PATH + '.gz'
    gz_tmp = gz_path + '.tmp'
    with open(JSON_PATH, 'rb') as f_in:
        with gzip.open(gz_tmp, 'wb') as f_out:
            shutil.copyfileobj(f_in, f_out)
    os.replace(gz_tmp, gz_path)
    elapsed = time.time() - t0
    log.info(f"JSON geschreven: {JSON_PATH}")
    log.info(f"Artikelen: {total} | Unmapped allergenen: {len(unmapped_all)}")
    log.info(f"Klaar in {elapsed:.1f}s")

    if unmapped_all:
        log.warning("Unmapped allergeen-codes:")
        for line in unmapped_all:
            log.warning(f"  {line}")

    print(f"OK: {total} artikelen → {JSON_PATH} ({elapsed:.1f}s)")

if __name__ == "__main__":
    convert()

