-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathapp.py
More file actions
117 lines (85 loc) · 4.05 KB
/
app.py
File metadata and controls
117 lines (85 loc) · 4.05 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
import json
import gradio as gr
from PIL import ImageOps
from tesseract_ocr import TesseractOCR
from GeminiClient import GeminiClient
tess_ocr = TesseractOCR()
gemini_client = GeminiClient()
def process_image(image, threshold, flip_horizontal):
if image is None:
return "Nenhuma imagem enviada", "", ""
if flip_horizontal:
image = ImageOps.mirror(image)
temp_path = "temp_image.png"
image.save(temp_path)
ocr_result = tess_ocr.ocr_with_confidence(temp_path)
improved_result = gemini_client.improve_low_confidence_words(
ocr_result, threshold=threshold)
ocr_output = format_ocr_result(ocr_result, threshold)
gemini_output = format_gemini_result(improved_result)
final_output = format_final_text(ocr_result, improved_result, threshold)
return ocr_output, gemini_output, final_output
def format_ocr_result(ocr_result, threshold):
output = f"Total de palavras detectadas: {len(ocr_result['words'])}\n\n"
output += "Palavras reconhecidas:\n"
output += "-" * 50 + "\n"
for word in ocr_result['words']:
status = "OK" if word['confidence'] >= threshold else "BAIXA"
output += f"{word['text']:30} | Confianca: {word['confidence']:.1f}% [{status}]\n"
return output
def format_gemini_result(gemini_result):
if gemini_result['status'] == 'no_corrections_needed':
return "Nenhuma correcao necessaria. Todas as palavras tem confianca acima do limite."
output = f"Status: {gemini_result['status']}\n"
output += f"Total de palavras: {gemini_result['total_words']}\n"
output += f"Palavras com baixa confianca: {gemini_result['low_confidence_count']}\n\n"
output += "Correcoes sugeridas pelo Gemini:\n"
output += "-" * 50 + "\n"
for correction in gemini_result['corrections']:
output += f"Original: {correction['original']:20} ({correction['original_confidence']:.1f}%)\n"
output += f"Sugerido: {correction['suggested']}\n"
output += "-" * 50 + "\n"
return output
def format_final_text(ocr_result, gemini_result, threshold):
corrections_map = {}
if gemini_result['status'] == 'corrections_made':
for correction in gemini_result['corrections']:
corrections_map[correction['original']] = correction['suggested']
final_words = []
output = "Texto final (alta confianca + correcoes Gemini):\n"
output += "=" * 50 + "\n\n"
for word in ocr_result['words']:
if word['confidence'] >= threshold:
final_words.append(word['text'])
else:
corrected = corrections_map.get(word['text'], word['text'])
final_words.append(corrected)
output += " ".join(final_words)
output += "\n\n" + "=" * 50 + "\n"
output += f"Total de palavras: {len(final_words)}\n"
output += f"Palavras de alta confianca: {sum(1 for w in ocr_result['words'] if w['confidence'] >= threshold)}\n"
output += f"Palavras corrigidas pelo Gemini: {len(corrections_map)}\n"
return output
with gr.Blocks(title="OCR com Gemini") as demo:
gr.Markdown("# OCR com correcao via Gemini")
gr.Markdown(
"Faca upload de uma imagem para extrair texto e corrigir palavras com baixa confianca")
with gr.Row():
with gr.Column():
image_input = gr.Image(type="pil", label="Imagem")
threshold_slider = gr.Slider(minimum=0, maximum=100, value=70, step=5,
label="Limite de confianca (%)")
flip_checkbox = gr.Checkbox(
label="Espelhar horizontalmente", value=False)
process_btn = gr.Button("Processar", variant="primary")
with gr.Column():
final_output = gr.Textbox(label="Texto Final", lines=10)
ocr_output = gr.Textbox(label="Resultado do OCR", lines=10)
gemini_output = gr.Textbox(label="Correcoes do Gemini", lines=10)
process_btn.click(
fn=process_image,
inputs=[image_input, threshold_slider, flip_checkbox],
outputs=[ocr_output, gemini_output, final_output]
)
if __name__ == "__main__":
demo.launch()