#!/usr/bin/env python3
"""
Simple Ollama Qwen3 Client
A standalone script to query Ollama's Qwen3 model
"""

import ollama
import sys
import argparse


def query_qwen3(prompt: str, model: str = "qwen3:4b", temperature: float = 0.7, stream: bool = False):
    """
    Send a prompt to Qwen3 and get the response

    Args:
        prompt: The text prompt to send
        model: Model name (default: qwen3:4b)
        temperature: Sampling temperature (0.0-1.0, default: 0.7)
        stream: Whether to stream the response (default: False)

    Returns:
        The model's response string
    """
    try:
        if stream:
            # Streaming response
            print("🤖 Qwen3 (streaming):\n", end="", flush=True)
            full_response = ""
            for chunk in ollama.generate(
                model=model,
                prompt=prompt,
                stream=True,
                options={'temperature': temperature}
            ):
                content = chunk.get('response', '')
                print(content, end="", flush=True)
                full_response += content
            print()  # Final newline
            return full_response
        else:
            # Non-streaming response
            response = ollama.generate(
                model=model,
                prompt=prompt,
                options={'temperature': temperature}
            )
            return response['response']

    except Exception as e:
        return f"❌ Error: {e}"


def interactive_mode(model: str = "qwen3:4b", temperature: float = 0.7):
    """Run in interactive chat mode"""
    print(f"🤖 Qwen3 Chat Mode ({model})")
    print("Type 'exit', 'quit', or press Ctrl+C to exit\n")

    while True:
        try:
            prompt = input("You: ").strip()
            if prompt.lower() in ['exit', 'quit', 'q']:
                print("Goodbye!")
                break
            if not prompt:
                continue

            response = ollama.generate(
                model=model,
                prompt=prompt,
                options={'temperature': temperature}
            )
            print(f"\nQwen3: {response['response']}\n")

        except KeyboardInterrupt:
            print("\nGoodbye!")
            break


def main():
    parser = argparse.ArgumentParser(
        description="Query Ollama's Qwen3 model",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Examples:
  python ollama_qwen3.py "What is the capital of France?"
  python ollama_qwen3.py -p "Explain quantum computing" --temp 0.3
  python ollama_qwen3.py --interactive
  echo "Hello world" | python ollama_qwen3.py --stdin
        """
    )

    parser.add_argument('prompt', nargs='?', help='The prompt text (optional if using --stdin)')
    parser.add_argument('-p', '--prompt-file', help='Read prompt from file')
    parser.add_argument('--model', default='qwen3:4b', help='Model name (default: qwen3:4b)')
    parser.add_argument('--temp', type=float, default=0.7, help='Temperature 0.0-1.0 (default: 0.7)')
    parser.add_argument('--stdin', action='store_true', help='Read prompt from stdin')
    parser.add_argument('--interactive', '-i', action='store_true', help='Interactive chat mode')
    parser.add_argument('--stream', action='store_true', help='Stream response')

    args = parser.parse_args()

    # Get prompt from various sources
    if args.interactive:
        interactive_mode(args.model, args.temp)
        return

    prompt = ""
    if args.stdin:
        prompt = sys.stdin.read().strip()
    elif args.prompt_file:
        with open(args.prompt_file, 'r') as f:
            prompt = f.read().strip()
    elif args.prompt:
        prompt = args.prompt

    if not prompt:
        print("❌ No prompt provided. Use --help for usage information.")
        sys.exit(1)

    # Query model
    if args.stream:
        query_qwen3(prompt, args.model, args.temp, stream=True)
    else:
        response = query_qwen3(prompt, args.model, args.temp)
        print(response)


if __name__ == "__main__":
    main()