Upload files to "/"

2023-10-19 11:36:27 +00:00 · 2023-10-19 11:36:27 +00:00 · 3397dd832b
commit 3397dd832b
1 changed files with 99 additions and 0 deletions
--- a/API.py
+++ b/API.py
@ -0,0 +1,99 @@
+from io import BytesIO
+import io
+from flask import Flask, request, jsonify
+from flask_cors import CORS 
+import pytesseract
+from PIL import Image
+import PyPDF2
+import requests
+from bs4 import BeautifulSoup
+
+app = Flask(__name__)
+CORS(app)  
+
+pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'  # Modify the path as needed
+
+@app.route('/ocr', methods=['POST'])
+def ocr_from_uploaded_image():
+    try:
+        # Assuming the request body is in JSON format
+        image_data = request.get_json()
+        image_url = image_data.get('image_url')
+      
+        response = requests.get(image_url)
+        image = Image.open(BytesIO(response.content))
+
+        # Perform OCR on the image
+        text = pytesseract.image_to_string(image)
+        
+        # Set CORS headers to allow all origins
+        response = jsonify({'extracted_text': text})
+        response.headers.add('Access-Control-Allow-Origin', '*')
+        
+        return response
+    except Exception as e:
+        return jsonify({'error': str(e)})
+
+@app.route('/AI',methods=['post'])
+def chat_AI():
+    
+        data=request.get_json()
+        return jsonify(data),201 
+
+@app.route('/pdf', methods=['POST'])
+def extract_text():
+    try:
+        data = request.get_json()
+        pdf_url = data.get('image_url')
+        response = requests.get(pdf_url)
+        if response.status_code == 200:
+            pdf_file = io.BytesIO(response.content)
+            pdf_reader = PyPDF2.PdfReader(pdf_file)
+            num_pages = len(pdf_reader.pages)
+            extracted_text = ''
+            for page_num in range(num_pages):
+                page = pdf_reader.pages[page_num]
+                extracted_text += page.extract_text()
+            return jsonify({'extracted_text': extracted_text})
+        else:
+            return jsonify({'error': 'Failed to download PDF from the provided URL'})
+
+    except Exception as e:
+        return jsonify({'error': str(e)})
+
+
+@app.route('/search_news', methods=['POST'])  # Change 'GET' to 'POST'
+def search_news():
+    # Get the URL and data to search for from the request's JSON data
+    data = request.get_json()
+
+    url = data.get('url')
+    search_term = data.get('data')
+
+    if url is None:
+        return jsonify({"error": "URL parameter is missing."})
+
+    if search_term is None:
+        return jsonify({"error": "Data parameter is missing."})
+
+    try:
+        response = requests.get(url)
+        response.raise_for_status()  # Check for request errors
+
+        soup = BeautifulSoup(response.text, "html.parser")
+
+        # Search for the presence of the specified data in <a> elements
+        links = soup.find_all("a")
+        news_paragraphs = [link.get_text() for link in links if search_term in link.get_text()]
+
+        if news_paragraphs:
+            return jsonify({"news_paragraphs": news_paragraphs})
+        else:
+            return jsonify({"message": "No news paragraphs found."})
+    except requests.exceptions.RequestException as e:
+        return jsonify({"error": f"Failed to retrieve the web page: {str(e)}"})
+
+if __name__ == '__main__':
+    app.run(debug=True)
+
+