mirror of
https://github.com/alexal1/Insomniac.git
synced 2026-04-26 10:05:50 +03:00
[GH-ISSUE #450] UnicodeDecodeError: 'utf-8' codec can't decode byte 0xe4 in position 10: invalid continuation byte #854
Labels
No labels
bug
duplicate
enhancement
pull-request
No milestone
No project
No assignees
1 participant
Notifications
Due date
No due date set.
Dependencies
No dependencies set.
Reference
starred/Insomniac#854
Loading…
Add table
Add a link
Reference in a new issue
No description provided.
Delete branch "%!s()"
Deleting a branch is permanent. Although the deleted branch may continue to exist for a short time before it actually gets removed, it CANNOT be undone in most cases. Continue?
Originally created by @martinxb on GitHub (Feb 16, 2023).
Original GitHub issue: https://github.com/alexal1/Insomniac/issues/450
I'm writing a basic python GUI NLP which allows a lawyer to compare the new case with old cases to find similar case file or files which can help with research and building a case of the new case they are working on.
import tkinter as tk
from tkinter import filedialog
from tkinter import messagebox
import gensim
import spacy
import os
import re
import difflib
GUI
window = tk.Tk()
window.title("Legal NLP GUI")
window.geometry("400x400")
Functions
def upload_file():
# Allow selection of any type of file
file_name = filedialog.askopenfilename(title = "Select file", filetypes = (("All files","."),("Text files",".txt"),("PDF files",".pdf"),("Doc files",".doc"),("CSV files",".csv"),("HTML files",".html"),("XLS files",".xls"),("XLSX files",".xlsx"),("JPEG files",".jpg"),("JPEG files",".jpeg"),("PNG files",".png")))
if file_name:
entry_upload.configure(state="normal")
entry_upload.delete(0,tk.END)
entry_upload.insert(0,file_name)
entry_upload.configure(state="readonly")
def upload_folder():
folder_path = filedialog.askdirectory(title="Select folder")
if folder_path:
entry_folder.configure(state="normal")
entry_folder.delete(0, tk.END)
entry_folder.insert(0, folder_path)
entry_folder.configure(state="readonly")
Allow the user to save the similar file to any directory they choose
def save_file():
save_path = filedialog.asksaveasfilename(title="Select file", filetypes=(
("All files", "."), ("Text files", ".txt"), ("PDF files", ".pdf"), ("Doc files", ".doc"),
("CSV files", ".csv"), ("HTML files", ".html"), ("XLS files", ".xls"), ("XLSX files", ".xlsx"),
("JPG files", ".jpg"), ("JPEG files", ".jpeg"), ("PNG files", ".png")))
if save_path:
entry_save.configure(state="normal")
entry_save.delete(0, tk.END)
entry_save.insert(0, save_path)
entry_save.configure(state="readonly")
def compare():
file_path = entry_upload.get()
folder_path = entry_folder.get()
Widgets
label_upload = tk.Label(text="Select new case file:")
label_upload.grid(column=0, row=0)
entry_upload = tk.Entry(width=30, state="readonly")
entry_upload.grid(column=1, row=0)
button_upload = tk.Button(text="Browse", command=upload_file)
button_upload.grid(column=2, row=0)
label_folder = tk.Label(text="Select folder with old case files:")
label_folder.grid(column=0, row=1)
entry_folder = tk.Entry(width=30, state="readonly")
entry_folder.grid(column=1, row=1)
button_folder = tk.Button(text="Browse", command=upload_folder)
button_folder.grid(column=2, row=1)
button_compare = tk.Button(text="Compare", command=compare)
button_compare.grid(column=3, row=1)
label_save = tk.Label(text="Select a file path to save the compared file:")
label_save.grid(column=0, row=2)
entry_save = tk.Entry(width=30, state="readonly")
entry_save.grid(column=2, row=2)
button_save = tk.Button(text="Save", command=save_file)
button_save.grid(column=3, row=2)
text_box = tk.Text(height=10)
text_box.grid(column=1, row=4)
window.mainloop()
But when I chick on the compare button the code gives me errors even if I changed the encoding of the files to utf-8 it still gives me errors below
C:\Users\user\PycharmProjects\pythonProject1\venv\Scripts\python.exe C:\Users\user\PycharmProjects\pythonProject1\main.py
Exception in Tkinter callback
Traceback (most recent call last):
File "C:\Users\user\AppData\Local\Programs\Python\Python311\Lib\tkinter_init_.py", line 1948, in call
return self.func(*args)
^^^^^^^^^^^^^^^^
File "C:\Users\user\PycharmProjects\pythonProject1\main.py", line 61, in compare
folder_contents += f.read() + "\n"
^^^^^^^^
File "<frozen codecs>", line 322, in decode
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xe4 in position 10: invalid continuation byte