from io import BytesIO import io from flask import Flask, request, jsonify from flask_cors import CORS import pytesseract from PIL import Image import PyPDF2 import requests from bs4 import BeautifulSoup app = Flask(__name__) CORS(app) pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe' # Modify the path as needed @app.route('/ocr', methods=['POST']) def ocr_from_uploaded_image(): try: # Assuming the request body is in JSON format image_data = request.get_json() image_url = image_data.get('image_url') response = requests.get(image_url) image = Image.open(BytesIO(response.content)) # Perform OCR on the image text = pytesseract.image_to_string(image) # Set CORS headers to allow all origins response = jsonify({'extracted_text': text}) response.headers.add('Access-Control-Allow-Origin', '*') return response except Exception as e: return jsonify({'error': str(e)}) @app.route('/AI',methods=['post']) def chat_AI(): data=request.get_json() return jsonify(data),201 @app.route('/pdf', methods=['POST']) def extract_text(): try: data = request.get_json() pdf_url = data.get('image_url') response = requests.get(pdf_url) if response.status_code == 200: pdf_file = io.BytesIO(response.content) pdf_reader = PyPDF2.PdfReader(pdf_file) num_pages = len(pdf_reader.pages) extracted_text = '' for page_num in range(num_pages): page = pdf_reader.pages[page_num] extracted_text += page.extract_text() return jsonify({'extracted_text': extracted_text}) else: return jsonify({'error': 'Failed to download PDF from the provided URL'}) except Exception as e: return jsonify({'error': str(e)}) @app.route('/search_news', methods=['POST']) # Change 'GET' to 'POST' def search_news(): # Get the URL and data to search for from the request's JSON data data = request.get_json() url = data.get('url') search_term = data.get('data') if url is None: return jsonify({"error": "URL parameter is missing."}) if search_term is None: return jsonify({"error": "Data parameter is missing."}) try: response = requests.get(url) response.raise_for_status() # Check for request errors soup = BeautifulSoup(response.text, "html.parser") # Search for the presence of the specified data in elements links = soup.find_all("a") news_paragraphs = [link.get_text() for link in links if search_term in link.get_text()] if news_paragraphs: return jsonify({"news_paragraphs": news_paragraphs}) else: return jsonify({"message": "No news paragraphs found."}) except requests.exceptions.RequestException as e: return jsonify({"error": f"Failed to retrieve the web page: {str(e)}"}) if __name__ == '__main__': app.run(debug=True)