fixed bug for empty list in google ngram function

9537e0c2 · jmzk96 · bc7af70e · 9537e0c2
Commit 9537e0c2 authored 3 years ago by jmzk96
--- a/src/create_corpus.py
+++ b/src/create_corpus.py
@@ -193,15 +193,20 @@ def get_google_ngram_occurences(query:str,corpus:str,start_year:int,end_year:int
        corpus = "eng_us_2012"
    url = f"https://books.google.com/ngrams/json?content={query}&year_start={str(start_year)}&year_end={str(end_year)}&corpus={str(corpora[corpus])}&smoothing={str(smoothing)}&case_insensitive={str(case_insensitive)}"
    response = requests.get(url)
+    print(response.content)
    if response.ok:
        results = json.loads(response.content)
-        if specific_year and start_year <= specific_year <= end_year:
-            index = [i for i in range(end_year-start_year+1)].index(specific_year)
-            specified_year = results["timeseries"][index]
+        if results and specific_year and start_year <= specific_year <= end_year:
+            index = [i for i in range(start_year,end_year+1)].index(specific_year)
+            specified_year = results[0]["timeseries"][index]
            return specified_year
        else:
-            log.info("No specific year chosen")
-            return results["timeseries"]
+            if len(results) == 0:
+                log.info("No record or results")
+                return None
+            else:
+                log.info("No specific year chosen or wrong year order")
+                return results
    else:
        log.info("No response found for query")
        return None
@@ -236,5 +241,5 @@ if __name__ == "__main__":
        # threshold = 10
        # print(f"threshold={threshold}")
        # trim_corpus(n, threshold)
-
-    create_POS_corpus(3)
\ No newline at end of file
+    print(get_google_ngram_occurences( "I lovess Jesus","eng_2012",2002,2003,2003))
+    # create_POS_corpus(3)
\ No newline at end of file