"""
Final extraction v6: Using real naviKey values from userrole.htm findAllLayers.

The 'find' action on dataTransmissionAPI requires "keyColumn" or "filter".
We now have real navi_key values to try with findGeom, findAll, search, etc.
"""
import sys
import os
import json
import time
import re
import urllib3
import requests
from pathlib import Path
from datetime import datetime

sys.stdout.reconfigure(encoding='utf-8')
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

GISWEB_BASE = "https://ymspace.ga.nycu.edu.tw/gisweb"
PUBLIC_BASE = f"{GISWEB_BASE}/public"
OUTPUT_DIR = Path(r"C:\Users\thc1006\Desktop\NQSD\新增資料夾\data\ymmap_archive\route_data\v6_final")
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

SESSION = requests.Session()
SESSION.verify = False
SESSION.headers.update({
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
    "Accept": "application/json, */*",
    "Referer": f"{PUBLIC_BASE}/map.htm",
    "X-Requested-With": "XMLHttpRequest",
})

DELAY = 0.3
total_requests = 0
all_hits = []
real_data_files = []


def post_api(endpoint, action, data, label=""):
    global total_requests
    total_requests += 1
    url = f"{PUBLIC_BASE}/{endpoint}?action={action}"
    try:
        resp = SESSION.post(url, data=data, timeout=15)
        result = {
            "status": resp.status_code,
            "content_type": resp.headers.get("Content-Type", ""),
            "length": len(resp.content),
            "label": label,
        }
        try:
            result["data"] = resp.json()
            result["is_json"] = True
        except:
            result["data"] = resp.text.strip()
            result["is_json"] = False
        is_error = False
        if result["is_json"] and isinstance(result["data"], dict):
            msg = str(result["data"].get("message", "")) + str(result["data"].get("msg", ""))
            if any(x in msg for x in ["Exception", "null", "Error", "not present", "not met", "Incorrect", "cannot", "Cannot"]):
                is_error = True
            if result["data"].get("success") is False:
                is_error = True
        elif result["status"] >= 400:
            is_error = True
        result["is_error"] = is_error
        result["is_hit"] = not is_error and result["length"] > 10
        return result
    except Exception as e:
        return {"error": str(e), "is_hit": False, "is_error": True, "label": label}


def get_api(endpoint, action, params=None, label=""):
    global total_requests
    total_requests += 1
    url = f"{PUBLIC_BASE}/{endpoint}"
    full_params = {"action": action}
    if params:
        full_params.update(params)
    try:
        resp = SESSION.get(url, params=full_params, timeout=15)
        result = {
            "status": resp.status_code,
            "content_type": resp.headers.get("Content-Type", ""),
            "length": len(resp.content),
            "label": label,
        }
        try:
            result["data"] = resp.json()
            result["is_json"] = True
        except:
            result["data"] = resp.text.strip()
            result["is_json"] = False
        is_error = False
        if result["is_json"] and isinstance(result["data"], dict):
            msg = str(result["data"].get("message", "")) + str(result["data"].get("msg", ""))
            if any(x in msg for x in ["Exception", "null", "Error", "not present", "not met", "Incorrect", "cannot", "Cannot", "empty"]):
                is_error = True
            if result["data"].get("success") is False:
                is_error = True
        elif result["status"] >= 400:
            is_error = True
        result["is_error"] = is_error
        result["is_hit"] = not is_error and result["length"] > 10
        return result
    except Exception as e:
        return {"error": str(e), "is_hit": False, "is_error": True, "label": label}


def save(filename, data):
    filepath = OUTPUT_DIR / filename
    if isinstance(data, (dict, list)):
        with open(filepath, "w", encoding="utf-8") as f:
            json.dump(data, f, ensure_ascii=False, indent=2)
    elif isinstance(data, str):
        with open(filepath, "w", encoding="utf-8") as f:
            f.write(data)
    return filepath


def log(result, tag=""):
    label = result.get("label", "")
    if result.get("is_hit"):
        data = result.get("data")
        preview = json.dumps(data, ensure_ascii=False)[:300] if isinstance(data, (dict, list)) else str(data)[:300]
        print(f"  [HIT] {label} (len={result['length']})")
        if result['length'] > 50:
            print(f"         {preview[:200]}")
        all_hits.append({"label": label, "length": result["length"]})
        return True
    elif tag:
        data = result.get("data", {})
        msg = str(data.get("message", "")) if isinstance(data, dict) else str(data)[:100]
        print(f"  [{tag}] {label}: {msg[:120]}")
    return False


def main():
    print(f"Final Route.htm Extraction v6 - {datetime.now().isoformat()}")
    print(f"Output: {OUTPUT_DIR}")

    # Load the layer data from v5
    layers_file = Path(r"C:\Users\thc1006\Desktop\NQSD\新增資料夾\data\ymmap_archive\route_data\v5\userrole_findAllLayers.json")
    with open(layers_file, "r", encoding="utf-8") as f:
        layers_data = json.load(f)

    rows = layers_data.get("rows", [])
    print(f"\nLoaded {len(rows)} layers from userrole findAllLayers")

    # Extract all unique navi_keys
    navi_keys = list(set(r["navi_key"] for r in rows if r.get("navi_key")))
    print(f"Unique navi_keys: {len(navi_keys)}")
    for nk in sorted(navi_keys):
        name = next((r["navi_name"] for r in rows if r["navi_key"] == nk), "")
        print(f"  {nk} -> {name}")

    # Save layer summary
    layer_summary = []
    for r in rows:
        layer_summary.append({
            "id": r["id"],
            "navi_key": r.get("navi_key"),
            "navi_name": r.get("navi_name"),
            "navi_name_en": r.get("navi_name_en"),
            "navi_type": r.get("navi_type"),
            "group_name": r.get("group_name"),
            "group_name_en": r.get("group_name_en"),
        })
    save("_layer_summary.json", layer_summary)

    # ============================================================
    # PHASE 1: dataTransmissionAPI find with real naviKeys
    # ============================================================
    print("\n" + "=" * 70)
    print("PHASE 1: dataTransmissionAPI 'find' with real naviKeys")
    print("=" * 70)

    for nk in sorted(navi_keys):
        # find with keyColumn
        q = json.dumps({"target": nk, "action": "find", "keyColumn": "gid"})
        result = get_api("route.htm", "dataTransmissionAPI", {"query": q}, f"find target={nk} keyColumn=gid")
        time.sleep(DELAY)
        if log(result, "info"):
            save(f"dataTrans_find_{nk}.json", result["data"])
            real_data_files.append(f"dataTrans_find_{nk}.json")

        # find with filter
        q = json.dumps({"target": nk, "action": "find", "filter": "1=1"})
        result = get_api("route.htm", "dataTransmissionAPI", {"query": q}, f"find target={nk} filter=1=1")
        time.sleep(DELAY)
        if log(result, ""):
            save(f"dataTrans_find_filter_{nk}.json", result["data"])
            real_data_files.append(f"dataTrans_find_filter_{nk}.json")

    # ============================================================
    # PHASE 2: findGeom with real navi_keys as path
    # ============================================================
    print("\n" + "=" * 70)
    print("PHASE 2: findGeom POST with real navi_keys as path")
    print("=" * 70)

    for nk in sorted(navi_keys):
        for gid in ["1", "2"]:
            result = post_api("route.htm", "findGeom",
                              {"path": nk, "gid": gid, "proj": "EPSG:4326"},
                              f"findGeom path={nk} gid={gid}")
            time.sleep(DELAY)
            if log(result, "info" if gid == "1" else ""):
                save(f"findGeom_{nk}_gid{gid}.json", result["data"])
                real_data_files.append(f"findGeom_{nk}_gid{gid}.json")

    # ============================================================
    # PHASE 3: findAll with real navi_keys as path
    # ============================================================
    print("\n" + "=" * 70)
    print("PHASE 3: findAll POST with real navi_keys as path/tableName")
    print("=" * 70)

    for nk in sorted(navi_keys):
        result = post_api("route.htm", "findAll",
                          {"path": nk},
                          f"findAll path={nk}")
        time.sleep(DELAY)
        if log(result, "info"):
            save(f"findAll_{nk}.json", result["data"])
            real_data_files.append(f"findAll_{nk}.json")

    # ============================================================
    # PHASE 4: getHeaders with real navi_keys
    # ============================================================
    print("\n" + "=" * 70)
    print("PHASE 4: getHeaders with real navi_keys")
    print("=" * 70)

    for nk in sorted(navi_keys):
        result = post_api("route.htm", "getHeaders",
                          {"path": nk},
                          f"getHeaders path={nk}")
        time.sleep(DELAY)
        if log(result, "info"):
            save(f"getHeaders_{nk}.json", result["data"])
            real_data_files.append(f"getHeaders_{nk}.json")

    # ============================================================
    # PHASE 5: Expand buildinfo to all buildings
    # ============================================================
    print("\n" + "=" * 70)
    print("PHASE 5: Complete buildinfo extraction (all building IDs)")
    print("=" * 70)

    # Get centroids for ALL buildings (Y001-Y019, B001-B020, P001-P009)
    all_buildings = {}
    for prefix, max_id in [("Y", 19), ("B", 20), ("P", 9)]:
        for num in range(1, max_id + 1):
            bid = f"{prefix}{num:03d}"
            result = post_api("buildinfo.htm", "getCentroidByBuildId",
                              {"buildId": bid, "proj": "EPSG:4326"},
                              f"centroid {bid}")
            time.sleep(DELAY)
            data = result.get("data", {})
            if result.get("is_hit") and isinstance(data, dict):
                d = data.get("data", [])
                if d and isinstance(d, list) and len(d) > 0:
                    all_buildings[bid] = d[0]
                    name = d[0].get("name", "?")
                    lat = d[0].get("lat", "?")
                    lon = d[0].get("lon", "?")
                    print(f"  {bid}: {name} ({lat}, {lon})")

    save("all_buildings_centroids.json", all_buildings)
    print(f"\n  Found {len(all_buildings)} buildings with data")

    # Get public data for all found buildings
    all_public_data = {}
    for bid in all_buildings:
        result = post_api("buildinfo.htm", "loadPublicData",
                          {"buildId": bid, "locale": "zh-TW"},
                          f"publicData {bid}")
        time.sleep(DELAY)
        if result.get("is_hit"):
            d = result.get("data", {}).get("data", [])
            if d:
                all_public_data[bid] = d[0]

    save("all_buildings_publicdata.json", all_public_data)

    # Get floor lists for all buildings
    all_floors = {}
    for bid in all_buildings:
        result = post_api("buildinfo.htm", "getFloorList",
                          {"buildId": bid},
                          f"floors {bid}")
        time.sleep(DELAY)
        if result.get("is_hit"):
            d = result.get("data", {}).get("data", [])
            if d:
                all_floors[bid] = [f["floor"] for f in d]

    save("all_buildings_floors.json", all_floors)
    print(f"\n  Buildings with floor data: {len([b for b, f in all_floors.items() if f])}")

    # Get room data for all buildings with floors
    print("\n  --- Room data ---")
    all_rooms = {}
    for bid, floors in all_floors.items():
        if not floors:
            continue
        for floor in floors:
            result = post_api("roominfo.htm", "findByFloor",
                              {"buildId": bid, "floor": floor, "locale": "zh-TW"},
                              f"rooms {bid}/{floor}")
            time.sleep(DELAY)
            if result.get("is_hit"):
                d = result.get("data", {}).get("data", [])
                if d:
                    key = f"{bid}_{floor}"
                    all_rooms[key] = d
                    print(f"    {bid}/{floor}: {len(d)} rooms")

    save("all_rooms_by_floor.json", all_rooms)
    print(f"\n  Total floor-room combos with data: {len(all_rooms)}")

    # Get building bounds
    all_bounds = {}
    for bid in all_buildings:
        result = get_api("buildinfo.htm", "getBoundingBoxByBuildId",
                         {"buildId": bid, "proj": "EPSG:4326"},
                         f"bbox {bid}")
        time.sleep(DELAY)
        if result.get("is_hit"):
            d = result.get("data", {}).get("data", [])
            if d:
                all_bounds[bid] = d[0].get("bound", "")

    save("all_buildings_bounds.json", all_bounds)

    # Get building images
    all_images = {}
    for bid in all_buildings:
        result = post_api("buildinfo.htm", "loadImage",
                          {"buildId": bid, "naviKey": "buildinfo"},
                          f"images {bid}")
        time.sleep(DELAY)
        if result.get("is_hit"):
            d = result.get("data", {}).get("data", [])
            if d:
                all_images[bid] = d

    save("all_buildings_images.json", all_images)
    print(f"\n  Buildings with images: {len(all_images)}")

    # ============================================================
    # FINAL SUMMARY
    # ============================================================
    print("\n" + "=" * 70)
    print("FINAL SUMMARY")
    print("=" * 70)
    print(f"Total requests: {total_requests}")
    print(f"Total hits: {len(all_hits)}")
    print(f"\nData extracted:")
    print(f"  Campuses: 3")
    print(f"  Buildings with coordinates: {len(all_buildings)}")
    print(f"  Buildings with floor data: {len([b for b, f in all_floors.items() if f])}")
    print(f"  Floor-room combos: {len(all_rooms)}")
    print(f"  Building bounds: {len(all_bounds)}")
    print(f"  Building images: {len(all_images)}")
    print(f"  Layer definitions: {len(rows)}")
    print(f"  Unique naviKeys: {len(navi_keys)}")

    final_summary = {
        "date": datetime.now().isoformat(),
        "total_requests": total_requests,
        "total_hits": len(all_hits),
        "data_extracted": {
            "campuses": 3,
            "buildings_with_coords": len(all_buildings),
            "buildings_with_floors": len([b for b, f in all_floors.items() if f]),
            "floor_room_combos": len(all_rooms),
            "building_bounds": len(all_bounds),
            "building_images": len(all_images),
            "layer_definitions": len(rows),
            "unique_navi_keys": len(navi_keys),
        },
        "navi_keys": sorted(navi_keys),
        "buildings": {bid: {
            "name": all_buildings.get(bid, {}).get("name", ""),
            "name_en": all_buildings.get(bid, {}).get("name_en", ""),
            "lat": all_buildings.get(bid, {}).get("lat", ""),
            "lon": all_buildings.get(bid, {}).get("lon", ""),
            "floors": all_floors.get(bid, []),
            "has_rooms": bid in {k.split("_")[0] for k in all_rooms},
            "has_images": bid in all_images,
            "has_bounds": bid in all_bounds,
        } for bid in sorted(all_buildings.keys())},
    }
    save("_FINAL_SUMMARY_v6.json", final_summary)


if __name__ == "__main__":
    main()
