Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
S
Statistical_Grammar_Checker
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Iterations
Wiki
Requirements
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Test cases
Artifacts
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Insights
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
NLP_WS_2021
Statistical_Grammar_Checker
Commits
7c099d04
Commit
7c099d04
authored
3 years ago
by
Naa
Browse files
Options
Downloads
Patches
Plain Diff
create functions for converting sentence index and n_gram index back and forth
parent
897d3902
No related branches found
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
src/grammar_checker_google.py
+39
-3
39 additions, 3 deletions
src/grammar_checker_google.py
with
39 additions
and
3 deletions
src/grammar_checker_google.py
+
39
−
3
View file @
7c099d04
...
...
@@ -35,8 +35,9 @@ class GrammarChecker:
for
i_error
in
sorted
(
i_errors
):
error
.
append
(
n_grams
[
1
][
i_error
][
0
])
print
(
f
"
Error:
{
'
'
.
join
(
error
)
}
"
)
self
.
suggest_correction
(
n_grams
,
i_errors
)
def
get_google_ngram_prob
(
self
,
n_gram
):
def
get_google_ngram_prob
(
self
,
n_gram
,
suggestion
=
False
):
"""
gets probability for given n_gram
"""
url
=
f
"
https://books.google.com/ngrams/json?content=
{
'
'
.
join
(
n_gram
)
}
&case_insensitive=true
"
successful
=
False
...
...
@@ -49,6 +50,8 @@ class GrammarChecker:
results
=
json
.
loads
(
response
.
content
)
if
results
:
max_prob
=
0.0
if
suggestion
:
print
(
results
)
for
result
in
results
:
cur_max_prob
=
max
(
results
[
0
][
"
timeseries
"
])
max_prob
=
cur_max_prob
if
cur_max_prob
>
max_prob
else
max_prob
...
...
@@ -66,7 +69,7 @@ class GrammarChecker:
def
get_prob_of_n_gram
(
self
,
n_gram
):
"""
calculates probability of n_gram
"""
# smallest possible positive float (1e-324 == 0.0)
float_min
=
1e-
100
float_min
=
1e-
6
# float_min = 1e-323
# get n_gram probability
prob
=
self
.
get_google_ngram_prob
(
n_gram
)
...
...
@@ -78,6 +81,14 @@ class GrammarChecker:
word_indexes
.
append
(
word_indexes
[
-
1
]
+
1
)
return
word_indexes
def
get_n_gram_indexes_from_word_index
(
self
,
n
,
n_gram_cnt
,
word_index
):
n_gram_indexes
=
[
0
]
if
word_index
<
n
else
[
word_index
-
n
+
1
]
for
i
in
range
(
word_index
%
n
if
word_index
<
n
else
n
-
1
):
nxt
=
n_gram_indexes
[
-
1
]
+
1
if
nxt
<
n_gram_cnt
:
n_gram_indexes
.
append
(
nxt
)
return
n_gram_indexes
def
find_index_of_error
(
self
,
n_grams
):
"""
finds index of greatest error in n_grams
"""
# get probabilities for all n_grams
...
...
@@ -106,9 +117,34 @@ class GrammarChecker:
if
counter
>=
max_counter
:
i_errors
.
append
(
index
)
max_counter
=
counter
# over_threshold = True if (chained_probs)**(1/len(n_grams)) <= self.threshold else False
return
i_errors
# return i_errors if markov[?] <= self.threshold else None
def
suggest_correction
(
self
,
n_grams
,
i_errors
):
print
()
for
i_error
in
i_errors
:
print
(
f
"
i_error:
{
i_error
}
"
)
print
(
n_grams
[
1
][
i_error
])
for
n
,
grams
in
n_grams
.
items
():
# skip unigrams
if
n
==
1
:
continue
# create asterisk n_grams
n_gram_indexes
=
self
.
get_n_gram_indexes_from_word_index
(
n
,
len
(
grams
),
i_error
)
for
i
,
n_gram_index
in
enumerate
(
n_gram_indexes
):
pos
=
i_error
-
i
if
i_error
<
n
else
n
-
1
-
i
tmp
=
list
(
grams
[
n_gram_index
])
tmp
[
pos
]
=
"
*
"
# create n_gram
construct
=
"
"
.
join
(
tmp
)
print
(
construct
)
# get suggestions
# self.get_google_ngram_prob(construct, suggestion=True)
print
()
print
()
print
()
assert
False
if
__name__
==
"
__main__
"
:
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment