commit 3397dd832b3ae8cbad1483d7138adf48379e77ca Author: chandhiran Date: Thu Oct 19 11:36:27 2023 +0000 Upload files to "/" diff --git a/API.py b/API.py new file mode 100644 index 0000000..090e653 --- /dev/null +++ b/API.py @@ -0,0 +1,99 @@ +from io import BytesIO +import io +from flask import Flask, request, jsonify +from flask_cors import CORS +import pytesseract +from PIL import Image +import PyPDF2 +import requests +from bs4 import BeautifulSoup + +app = Flask(__name__) +CORS(app) + +pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe' # Modify the path as needed + +@app.route('/ocr', methods=['POST']) +def ocr_from_uploaded_image(): + try: + # Assuming the request body is in JSON format + image_data = request.get_json() + image_url = image_data.get('image_url') + + response = requests.get(image_url) + image = Image.open(BytesIO(response.content)) + + # Perform OCR on the image + text = pytesseract.image_to_string(image) + + # Set CORS headers to allow all origins + response = jsonify({'extracted_text': text}) + response.headers.add('Access-Control-Allow-Origin', '*') + + return response + except Exception as e: + return jsonify({'error': str(e)}) + +@app.route('/AI',methods=['post']) +def chat_AI(): + + data=request.get_json() + return jsonify(data),201 + +@app.route('/pdf', methods=['POST']) +def extract_text(): + try: + data = request.get_json() + pdf_url = data.get('image_url') + response = requests.get(pdf_url) + if response.status_code == 200: + pdf_file = io.BytesIO(response.content) + pdf_reader = PyPDF2.PdfReader(pdf_file) + num_pages = len(pdf_reader.pages) + extracted_text = '' + for page_num in range(num_pages): + page = pdf_reader.pages[page_num] + extracted_text += page.extract_text() + return jsonify({'extracted_text': extracted_text}) + else: + return jsonify({'error': 'Failed to download PDF from the provided URL'}) + + except Exception as e: + return jsonify({'error': str(e)}) + + +@app.route('/search_news', methods=['POST']) # Change 'GET' to 'POST' +def search_news(): + # Get the URL and data to search for from the request's JSON data + data = request.get_json() + + url = data.get('url') + search_term = data.get('data') + + if url is None: + return jsonify({"error": "URL parameter is missing."}) + + if search_term is None: + return jsonify({"error": "Data parameter is missing."}) + + try: + response = requests.get(url) + response.raise_for_status() # Check for request errors + + soup = BeautifulSoup(response.text, "html.parser") + + # Search for the presence of the specified data in elements + links = soup.find_all("a") + news_paragraphs = [link.get_text() for link in links if search_term in link.get_text()] + + if news_paragraphs: + return jsonify({"news_paragraphs": news_paragraphs}) + else: + return jsonify({"message": "No news paragraphs found."}) + except requests.exceptions.RequestException as e: + return jsonify({"error": f"Failed to retrieve the web page: {str(e)}"}) + +if __name__ == '__main__': + app.run(debug=True) + +