Upload files to "/"
commit
3397dd832b
|
@ -0,0 +1,99 @@
|
||||||
|
from io import BytesIO
|
||||||
|
import io
|
||||||
|
from flask import Flask, request, jsonify
|
||||||
|
from flask_cors import CORS
|
||||||
|
import pytesseract
|
||||||
|
from PIL import Image
|
||||||
|
import PyPDF2
|
||||||
|
import requests
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
app = Flask(__name__)
|
||||||
|
CORS(app)
|
||||||
|
|
||||||
|
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe' # Modify the path as needed
|
||||||
|
|
||||||
|
@app.route('/ocr', methods=['POST'])
|
||||||
|
def ocr_from_uploaded_image():
|
||||||
|
try:
|
||||||
|
# Assuming the request body is in JSON format
|
||||||
|
image_data = request.get_json()
|
||||||
|
image_url = image_data.get('image_url')
|
||||||
|
|
||||||
|
response = requests.get(image_url)
|
||||||
|
image = Image.open(BytesIO(response.content))
|
||||||
|
|
||||||
|
# Perform OCR on the image
|
||||||
|
text = pytesseract.image_to_string(image)
|
||||||
|
|
||||||
|
# Set CORS headers to allow all origins
|
||||||
|
response = jsonify({'extracted_text': text})
|
||||||
|
response.headers.add('Access-Control-Allow-Origin', '*')
|
||||||
|
|
||||||
|
return response
|
||||||
|
except Exception as e:
|
||||||
|
return jsonify({'error': str(e)})
|
||||||
|
|
||||||
|
@app.route('/AI',methods=['post'])
|
||||||
|
def chat_AI():
|
||||||
|
|
||||||
|
data=request.get_json()
|
||||||
|
return jsonify(data),201
|
||||||
|
|
||||||
|
@app.route('/pdf', methods=['POST'])
|
||||||
|
def extract_text():
|
||||||
|
try:
|
||||||
|
data = request.get_json()
|
||||||
|
pdf_url = data.get('image_url')
|
||||||
|
response = requests.get(pdf_url)
|
||||||
|
if response.status_code == 200:
|
||||||
|
pdf_file = io.BytesIO(response.content)
|
||||||
|
pdf_reader = PyPDF2.PdfReader(pdf_file)
|
||||||
|
num_pages = len(pdf_reader.pages)
|
||||||
|
extracted_text = ''
|
||||||
|
for page_num in range(num_pages):
|
||||||
|
page = pdf_reader.pages[page_num]
|
||||||
|
extracted_text += page.extract_text()
|
||||||
|
return jsonify({'extracted_text': extracted_text})
|
||||||
|
else:
|
||||||
|
return jsonify({'error': 'Failed to download PDF from the provided URL'})
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
return jsonify({'error': str(e)})
|
||||||
|
|
||||||
|
|
||||||
|
@app.route('/search_news', methods=['POST']) # Change 'GET' to 'POST'
|
||||||
|
def search_news():
|
||||||
|
# Get the URL and data to search for from the request's JSON data
|
||||||
|
data = request.get_json()
|
||||||
|
|
||||||
|
url = data.get('url')
|
||||||
|
search_term = data.get('data')
|
||||||
|
|
||||||
|
if url is None:
|
||||||
|
return jsonify({"error": "URL parameter is missing."})
|
||||||
|
|
||||||
|
if search_term is None:
|
||||||
|
return jsonify({"error": "Data parameter is missing."})
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = requests.get(url)
|
||||||
|
response.raise_for_status() # Check for request errors
|
||||||
|
|
||||||
|
soup = BeautifulSoup(response.text, "html.parser")
|
||||||
|
|
||||||
|
# Search for the presence of the specified data in <a> elements
|
||||||
|
links = soup.find_all("a")
|
||||||
|
news_paragraphs = [link.get_text() for link in links if search_term in link.get_text()]
|
||||||
|
|
||||||
|
if news_paragraphs:
|
||||||
|
return jsonify({"news_paragraphs": news_paragraphs})
|
||||||
|
else:
|
||||||
|
return jsonify({"message": "No news paragraphs found."})
|
||||||
|
except requests.exceptions.RequestException as e:
|
||||||
|
return jsonify({"error": f"Failed to retrieve the web page: {str(e)}"})
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
app.run(debug=True)
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue