Skip to content
Snippets Groups Projects
Commit 7c099d04 authored by Naa's avatar Naa
Browse files

create functions for converting sentence index and n_gram index back and forth

parent 897d3902
No related branches found
No related tags found
No related merge requests found
......@@ -35,8 +35,9 @@ class GrammarChecker:
for i_error in sorted(i_errors):
error.append(n_grams[1][i_error][0])
print(f"Error: {' '.join(error)}")
self.suggest_correction(n_grams, i_errors)
def get_google_ngram_prob(self, n_gram):
def get_google_ngram_prob(self, n_gram, suggestion=False):
""" gets probability for given n_gram """
url = f"https://books.google.com/ngrams/json?content={' '.join(n_gram)}&case_insensitive=true"
successful = False
......@@ -49,6 +50,8 @@ class GrammarChecker:
results = json.loads(response.content)
if results:
max_prob = 0.0
if suggestion:
print(results)
for result in results:
cur_max_prob = max(results[0]["timeseries"])
max_prob = cur_max_prob if cur_max_prob > max_prob else max_prob
......@@ -66,7 +69,7 @@ class GrammarChecker:
def get_prob_of_n_gram(self, n_gram):
""" calculates probability of n_gram """
# smallest possible positive float (1e-324 == 0.0)
float_min = 1e-100
float_min = 1e-6
# float_min = 1e-323
# get n_gram probability
prob = self.get_google_ngram_prob(n_gram)
......@@ -78,6 +81,14 @@ class GrammarChecker:
word_indexes.append(word_indexes[-1]+1)
return word_indexes
def get_n_gram_indexes_from_word_index(self, n, n_gram_cnt, word_index):
n_gram_indexes = [0] if word_index < n else [word_index-n+1]
for i in range(word_index%n if word_index < n else n-1):
nxt = n_gram_indexes[-1]+1
if nxt < n_gram_cnt:
n_gram_indexes.append(nxt)
return n_gram_indexes
def find_index_of_error(self, n_grams):
""" finds index of greatest error in n_grams"""
# get probabilities for all n_grams
......@@ -106,9 +117,34 @@ class GrammarChecker:
if counter >= max_counter:
i_errors.append(index)
max_counter = counter
# over_threshold = True if (chained_probs)**(1/len(n_grams)) <= self.threshold else False
return i_errors
# return i_errors if markov[?] <= self.threshold else None
def suggest_correction(self, n_grams, i_errors):
print()
for i_error in i_errors:
print(f"i_error: {i_error}")
print(n_grams[1][i_error])
for n, grams in n_grams.items():
# skip unigrams
if n == 1:
continue
# create asterisk n_grams
n_gram_indexes = self.get_n_gram_indexes_from_word_index(n, len(grams), i_error)
for i, n_gram_index in enumerate(n_gram_indexes):
pos = i_error-i if i_error < n else n-1-i
tmp = list(grams[n_gram_index])
tmp[pos] = "*"
# create n_gram
construct = " ".join(tmp)
print(construct)
# get suggestions
# self.get_google_ngram_prob(construct, suggestion=True)
print()
print()
print()
assert False
if __name__ == "__main__":
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment