"""CLI: audio.wav → bsData.json  (runs inside lam_a2e conda env)"""
import argparse
import sys
from pathlib import Path

sys.path.insert(0, str(Path(__file__).parent))


def main() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument('--audio', required=True)
    parser.add_argument('--output', required=True)
    args = parser.parse_args()

    import librosa
    import numpy as np
    from engines.defaults import default_argument_parser, default_config_parser, default_setup
    from engines.infer import INFER
    from models.utils import export_blendshape_animation, ARKitBlendShape

    cfg_args = default_argument_parser().parse_args([])
    cfg_args.config_file = 'configs/lam_audio2exp_config_streaming.py'
    cfg = default_config_parser(cfg_args.config_file, {
        'audio_input': args.audio,
        'save_json_path': args.output,
    })
    cfg = default_setup(cfg)

    infer = INFER.build(dict(type=cfg.infer.type, cfg=cfg))
    infer.model.eval()

    audio, sr = librosa.load(args.audio, sr=16000)
    context = None
    all_exp = []
    for i in range(audio.shape[0] // 16000 + 1):
        out, context = infer.infer_streaming_audio(audio[i * 16000:(i + 1) * 16000], sr, context)
        all_exp.append(out['expression'])

    all_exp = np.concatenate(all_exp, axis=0)
    export_blendshape_animation(all_exp, args.output, ARKitBlendShape, fps=30.0)
    print(f'OK {len(all_exp)} frames', flush=True)


if __name__ == '__main__':
    main()
