Stop Using Stop-Words When They Reverse Meaning

I recently wrote a post about stopwords and noticed that the title was getting distorted in the URL. To fix this, I wrote some code to strip out all stopwords and create a post with a more appropriate URL. This code also creates a folder to store the posts and generates a blog.md file with a list of all the posts. Read my blog post to learn more about how I fixed this issue!

Fixing Stopwords: How I Rescued My Post From Getting Mangled in the URL

By Michael Levin

Friday, April 29, 2022

Ugh, after all that stopwords stuff from the earlier post, I’m stripping it out. The meaning can get totally mangled. For example, the title:

Don't Obfuscate A Perfectly Fine Pythonic FizzBuzz

…became the URL:

https://pythonically.com/blog/obfuscate-perfectly-fine-pythonic-fizzbuzz/

Ugh! Unacceptable! Okay, so I stripped out all that stop_words stuff, and the code is now:

# export

import nltk
import argparse
from pathlib import Path
from dateutil import parser
from slugify import slugify


if hasattr(__builtins__, "__IPYTHON__"):
    from IPython.display import display, Markdown

    h1 = lambda text: display(Markdown(f"# {text}"))
    h2 = lambda text: display(Markdown(f"## {text}"))
    h3 = lambda text: display(Markdown(f"### {text}"))

    folder_name = "../pythonically"
    blog_title = "Pythonic Ally Blog"
    blog_slug = "blog"
else:
    h1 = lambda text: print(f"# {text}")
    h2 = lambda text: print(f"## {text}")
    h3 = lambda text: print(f"## {text}")

    aparser = argparse.ArgumentParser()
    add_arg = aparser.add_argument
    add_arg("-p", "--path", required=True)
    add_arg("-t", "--title", required=True)
    add_arg("-s", "--slug", required=True)
    args = aparser.parse_args()

    folder_name = args.path
    blog_title = args.title
    blog_slug = args.slug


index_front_matter = f"""---
layout: post
title: "{blog_title}"
slug: {blog_slug}
---

## [{blog_title} as One Page](/journal/)

"""

journal_path = f"{folder_name}/journal.md"
output_path = f"{folder_name}/_posts/"
slicer = "-" * 80

Path(output_path).mkdir(exist_ok=True)

dates = []
counter = -1
date_next = False
with open(journal_path, "r") as fh:
    for line in fh:
        line = line.rstrip()
        if date_next:
            adate = line[3:]
            date_next = False
            adatetime = parser.parse(adate).date()
            dates.append(adatetime)
            date_next = False
        if line == slicer:
            date_next = True
            counter = counter + 1
dates.reverse()

table = []
at_top = True
index_list = []
with open(journal_path, "r") as fh:
    for i, line in enumerate(fh):
        line = line.rstrip()
        if line == slicer:
            if at_top:
                at_top = False
                table = []
                continue
            adatetime = dates[counter - 1]
            filename = f"{output_path}{adatetime}-post-{counter}.md"
            h3(f"FILE: {filename}")
            with open(filename, "w") as fw:
                title = f"Post {counter}"
                slug = title
                if table[0] == slicer:
                    table = table[1:]
                maybe = table[1]
                has_title = False
                if table and maybe and maybe[0] == "#":
                    title = maybe[maybe.find(" ") + 1 :]
                    has_title = True
                slug = slugify(title)
                top = []
                top.append("---\n")
                top.append("layout: post\n")
                top.append(f'title: "{title}"\n')
                top.append(f"categories: {blog_slug}\n")
                top.append(f"slug: {slug}\n")
                link = f"- [{title}](/{blog_slug}/{slug}/)"
                index_list.append(link)
                top.append("---\n")
                top.append("\n")
                top_chop = 2
                if has_title:
                    top_chop = 3
                table = [f"{x}\n" for x in table[top_chop:]]
                table = top + table
                print("".join(table))
                fw.writelines(table)
            counter = counter - 1
            table = []
        table.append(line)

index_page = index_front_matter + "\n\n" + "\n".join(index_list)

with open(f"{folder_name}/blog.md", "w") as fh:
    fh.writelines(index_page)

Stop Using Stop-Words When They Reverse Meaning

Fixing Stopwords: How I Rescued My Post From Getting Mangled in the URL

By Michael Levin

Friday, April 29, 2022

Categories