#!/usr/bin/python3
import requests
import json

def query_local_llm(prompt):
	url = "http://127.0.0.1:8080/v1/chat/completions"
	headers = {"Content-Type": "application/json"}
	data = {
		"messages": [
			{"role": "system", "content": "You are an expert Python developer."},
			{"role": "user", "content": prompt}
		],
		"stream": True  # Set to False if you just want the final result
	}

	response = requests.post(url, headers=headers, json=data, stream=True)

	print(f"Querying {url}...\n")
	
	for line in response.iter_lines():
		if line:
			# Remove the "data: " prefix from the SSE stream
			decoded_line = line.decode('utf-8')
			if decoded_line.startswith("data: "):
				content = decoded_line[6:]
				if content == "[DONE]":
					break
				
				chunk = json.loads(content)
				delta = chunk['choices'][0]['delta']
				if 'content' in delta:
					print(delta['content'], end='', flush=True)
	print("\n")

if __name__ == "__main__":
	query_local_llm("summarize War and Peace in 100 words ")
