diff --git a/src/grammar_checker_google_jeremy.py b/src/grammar_checker_google_jeremy.py index 153e23a5e38e7e67d263b0c2d3e234cbfbab44d9..530a3c44f08c181873e6b978379bbf5b9a91c15a 100644 --- a/src/grammar_checker_google_jeremy.py +++ b/src/grammar_checker_google_jeremy.py @@ -6,6 +6,7 @@ import requests from pyinflect import getAllInflections, getInflection from nltk.stem import WordNetLemmatizer from nltk.corpus import wordnet as wn +import numpy as np from time import sleep @@ -142,9 +143,10 @@ class GrammarCheckerGoogle: probs = [self.float_min] chained_probs = math.prod(probs) diff_list = [(j-i) for i, j in zip(probs[:-1], probs[1:])] + logged_chained_prob = -np.log10((chained_probs)**(1/len(n_grams))) if x != 0 else (chained_probs)**(1/len(n_grams)) - if (chained_probs)**(1/len(n_grams)) <= self.threshold: + if logged_chained_prob <= self.threshold: return probs.index(min(probs)) else: return None \ No newline at end of file diff --git a/src/main.py b/src/main.py index 59e4e6d0be55a6664a63c3dcc1af777dfd645079..5dcc6ca4f25256f804752147ff8b3a782b1d4b85 100644 --- a/src/main.py +++ b/src/main.py @@ -1,7 +1,9 @@ +from re import L import wx import nltk from wx.richtext import RichTextCtrl from grammar_checker import GrammarChecker +from grammar_checker_google_jeremy import * class StatisticalGrammarChecker(wx.App): @@ -51,7 +53,9 @@ class Panel(wx.Panel): self.create_ui() # grammar checker - self.grammar_checker = GrammarChecker(n=3) + # self.grammar_checker = GrammarChecker(n=3) + self.grammar_checker2 = GrammarCheckerGoogle(2,1e-200, 1e-6) + self.grammar_checker3 = GrammarCheckerGoogle(3,1e-200, 1e-6) # FRONTEND def create_ui(self): @@ -76,18 +80,75 @@ class Panel(wx.Panel): # THREADS def grammar_check_thread(self, event): - output_text = "" + # output_text = "" # get input input_text = self.text_input.GetValue() # for each sentence - sentences = nltk.sent_tokenize(input_text) - for i, sentence in enumerate(sentences): - # grammar check - output_text += self.grammar_checker.check(sentence) - if i != len(sentences): - output_text += " " + + # sentences = nltk.sent_tokenize(input_text) + # for i, sentence in enumerate(sentences): + # # grammar check + # output_text += self.grammar_checker.check(sentence) + # if i != len(sentences): + # output_text += " " + + error_grammar_2 = self.grammar_checker2.check(input_text) + error_grammar_3 = self.grammar_checker3.check(input_text) + if error_grammar_2 is not None: + error_grammar_2_list = list(list(nltk.ngrams(self.grammar_checker2.tokenizer.tokenize(input_text),self.grammar_checker2.n))[error_grammar_2]) + elif error_grammar_2 is None: + error_grammar_2_list = None + if error_grammar_3 is not None: + error_grammar_3_list = list(list(nltk.ngrams(self.grammar_checker3.tokenizer.tokenize(input_text),self.grammar_checker3.n))[error_grammar_3]) + elif error_grammar_3 is None: + error_grammar_3_list = None + if (error_grammar_3_list is not None and error_grammar_2_list is not None) or (error_grammar_3_list is not None and error_grammar_2_list is None): + list_of_error = error_grammar_3_list + elif error_grammar_3_list is None and error_grammar_2_list is not None: + list_of_error = error_grammar_2_list + elif error_grammar_3_list is None and error_grammar_2_list is None: + list_of_error = None + output_text = "Nothing to declare" + + if list_of_error is not None: + if len(list_of_error) == 2: + list_error_pos_1 = [list_of_error[0]] if suggest_inflection(list_of_error[0].lower()) is None else suggest_inflection(list_of_error[0].lower()) + list_error_pos_2 = [list_of_error[1]] if suggest_inflection(list_of_error[1].lower()) is None else suggest_inflection(list_of_error[1].lower()) + list_of_suggestions =[] + for i in list_error_pos_1: + for j in list_error_pos_2: + list_of_suggestions.append((i,j)) + list_of_prob = [] + for i in list_of_suggestions: + prob = self.grammar_checker2.get_google_ngram_prob(i) if self.grammar_checker2.get_google_ngram_prob(i) is not None else 0.0 + list_of_prob.append(prob) + output_bigram = list(list_of_suggestions.index(max(list_of_prob))) + output_text = " ".join(output_bigram) + + elif len(list_of_error) == 3: + list_error_pos_1 = [list_of_error[0]] if suggest_inflection(list_of_error[0].lower()) is None else suggest_inflection(list_of_error[0].lower()) + list_error_pos_2 = [list_of_error[1]] if suggest_inflection(list_of_error[1].lower()) is None else suggest_inflection(list_of_error[1].lower()) + list_error_pos_3 = [list_of_error[2]] if suggest_inflection(list_of_error[2].lower()) is None else suggest_inflection(list_of_error[2].lower()) + list_of_suggestions =[] + for i in list_error_pos_1: + for j in list_error_pos_2: + for x in list_error_pos_3: + list_of_suggestions.append((i,j,x)) + list_of_prob = [] + for i in list_of_suggestions: + prob = self.grammar_checker3.get_google_ngram_prob(i) if self.grammar_checker3.get_google_ngram_prob(i) is not None else 0.0 + list_of_prob.append(prob) + output_trigram = list(list_of_suggestions.index(max(list_of_prob))) + output_text = " ".join(output_trigram) + else: + output_text = "Something's wrong :(" + elif list_of_error is None: + output_text = "Nothing to declare" + + + # set output text - self.text_output.SetValue(output_text) + self.text_output.SetValue(str(output_text)) # highlight errors #for error_location in text_error_locations: # self.text_output.SetStyle(error_location[0], error_location[1], wx.TextAttr(colText=wx.WHITE, colBack=wx.RED))