Problem
the purpose of this code is to take a Taskwarrior (it’s a todo list application) server file that is mostly JSON and create unique ICS calendar events. The logic mostly works: I still see some duplicates that I can weed out, but doing that isn’t what I’m looking for in this review. The purpose of this code review is my coding style, readability, and efficiency. I’m new to python (but please be critical – I want to get better) so I’m sure I’m doing some strange things. This script is meant to be called from a Unix cron job.
#!/usr/bin/env python3
import os.path
from ics import Calendar, Event
import json
import sys
import os
import io
taskwarrior_formatted_data_location = "/var/taskd/tx.data"
ics_calendar_full_path = "/var/www/html/tasks.ics"
ics_calendar_full_path_chores = "/var/www/html/chores.ics"
ics_calendar_full_path_announcements = "/var/www/html/announcements.ics"
unique_list_of_calendar_tasks = []
valid_statuses_for_calendar = ['pending', 'waiting']
invalid_statuses_for_calendar = ['completed', 'deleted']
def create_uniqueness(task):
""" creates a definition of uniqueness from a task's attributes
input: task: an object with a Taskwarrior set of attributes
output: a string of the unique signature of this task """
if not task:
return None
if task.get('due') and task.get('description'):
return task['due'] + task['description']
else:
return task['uuid']
def is_unique_calendar_task(task):
""" if this task exists in the list of tasks to make a calendar already
input: task: an object with a Taskwarrior set of attributes
output: boolean - true if the task is unique, false if it already existed in the list """
if not task:
return None
if task['status'] in invalid_statuses_for_calendar:
return False
if task.get('status') and task['status'] in valid_statuses_for_calendar:
unique_task_id = create_uniqueness(task)
if unique_task_id in unique_list_of_calendar_tasks:
return False
unique_list_of_calendar_tasks.append(unique_task_id)
return True
return False
def create_task_calendar_description(task):
""" creates a custom description of the task for the calendar summary
input: task: an object with a Taskwarrior set of attributes
output: string to be used for the calendar event summary """
project = "{} {} {}".format("[", task['project'], "] ") if task.get('project') else ""
tags = " [" + ", ".join([k for k in task['tags'] if 'cal.' not in k]) + "]" if (task.get('tags') and [k for k in
task['tags'] if 'cal.' not in k]) else ""
return project + task['description'] + tags
def get_task_first_calendar(task):
""" find the first cal.<xyz> tag, which indicates which calendar this task should appear on. Defaults to the
general calendar
input: task: an object with a Taskwarrior set of attributes
output: string with the name of the calendar this event should go on """
if task.get('tags') is None:
return ""
cals = [s for s in task['tags'] if 'cal.' in s]
if not cals:
return ""
return cals[0].replace("cal.", "")
def get_unique_task():
""" read the JSON-like file, filtering out lines I don't need, and calling the unique function to create tasks to
be processed
input: none
output: yields a unique task """
real_lines = []
for line in io.open(taskwarrior_formatted_data_location, 'r', encoding='utf8'):
li = line.strip()
if li.startswith("{"):
real_lines.append(li)
lines_as_string = "[" + ",".join(real_lines) + "]"
for task in json.loads(lines_as_string):
if is_unique_calendar_task(task):
yield task
def get_task_start_date_for_event(task):
""" find the calendar event start date based on a hierarchy of which date to use
input: task: an object with a Taskwarrior set of attributes
output: date to use in Taskwarrior format """
if task is None:
return ""
if task.get('due'):
return task['due']
if task.get('scheduled'):
return task['scheduled']
if task.get('wait'):
return task['wait']
else:
return ""
if __name__ == "__main__":
general_cal = Calendar(creator="My TaskWarrior Calendar")
chores_cal = Calendar(creator="My TaskWarrior Chores Calendar")
ann_cal = Calendar(creator="My TaskWarrior Announcements Calendar")
for task in get_unique_task():
event_due = get_task_start_date_for_event(task)
if event_due in (None, ""):
continue
cal_event = Event()
cal_event.begin = event_due
cal_event.name = create_task_calendar_description(task)
task_first_calendar = get_task_first_calendar(task)
if task_first_calendar == "":
general_cal.events.append(cal_event)
if task_first_calendar == "chores":
chores_cal.events.append(cal_event)
if task_first_calendar == "announcements":
ann_cal.events.append(cal_event)
with open(os.path.expanduser(ics_calendar_full_path), 'w') as f:
f.writelines(general_cal)
with open(os.path.expanduser(ics_calendar_full_path_chores), 'w') as f:
f.writelines(chores_cal)
with open(os.path.expanduser(ics_calendar_full_path_announcements), 'w') as f:
f.writelines(ann_cal)
sys.exit(0)
Thanks in advance for your feedback.
Solution
- Do not mix standard library and third party imports.
- As you’re using Python 3 use function and variable annotations instead of defining types in docstrings. Docstrings are fine but they become outdated too soon. With annotation the advantage is that you can use static analyzers like Mypy to crosscheck.
unique_list_of_calendar_tasks = []
:unique_list
sounds like a set.io.open
is not required in Python 3. The builtinopen()
is fine unless you want to write Python 2/3 compatible code.- Do not maintain unique tasks in a global variable, you can easily move it under
get_unique_tasks
. - You seem to be doing a lots of processing related to a task, perhaps create a
Task
class that encapsulates these methods.
from typing import Any, Dict, Iterator
class Task:
def __init__(self, data: Dict[Any, Any]) -> None:
self.data = data
@property
def id(task) -> str:
if task.get('due') and task.get('description'):
return task['due'] + task['description']
return task['uuid']
@property
def is_valid(self) -> bool:
if self.data['status'] in invalid_statuses_for_calendar:
return False
if not (self.data.get('status') or self.data['status'] in valid_statuses_for_calendar):
return False
return True
@property
def first_calendar(self) -> str:
if self.data.get('tags') is None:
return ""
cal = next((s for s in self.data['tags'] if 'cal.' in s), None)
if cal is None:
return ""
return cal.replace("cal.", "")
@property
def start_date_for_event(self) -> str:
if self.data.get('due'):
return self.data['due']
if self.data.get('scheduled'):
return self.data['scheduled']
if self.data.get('wait'):
return self.data['wait']
else:
return ""
def __str__(self):
project = "{} {} {}".format("[", self.data['project'], "] ") if self.data.get('project') else ""
tags = " [" + ", ".join([k for k in self.data['tags'] if 'cal.' not in k]) + "]" if (self.data.get('tags') and [k for k in
self.data['tags'] if 'cal.' not in k]) else ""
return project + self.data['description'] + tags
def __eq__(self, other):
if isinstance(other, Task):
return self.id == other.id
return NotImplemented
def __hash__(self):
return hash(self.id)
def get_unique_tasks() -> Iterator[Dict[Any, Any]]:
real_lines = []
for line in open(taskwarrior_formatted_data_location, 'r', encoding='utf8'):
li = line.strip()
if li.startswith("{"):
real_lines.append(li)
lines_as_string = "[" + ",".join(real_lines) + "]"
tasks = set()
for row in json.loads(lines_as_string):
task = Task(row)
if task not in tasks and task.is_valid:
yield task
tasks.add(task)
if __name__ == "__main__":
general_cal = Calendar(creator="My TaskWarrior Calendar")
chores_cal = Calendar(creator="My TaskWarrior Chores Calendar")
ann_cal = Calendar(creator="My TaskWarrior Announcements Calendar")
for task in get_unique_task():
event_due = task.start_date_for_event
if not event_due:
continue
cal_event = Event()
cal_event.begin = event_due
cal_event.name = str(task)
task_first_calendar = task.first_calendar
if task_first_calendar == "":
general_cal.events.append(cal_event)
if task_first_calendar == "chores":
chores_cal.events.append(cal_event)
if task_first_calendar == "announcements":
ann_cal.events.append(cal_event)
with open(os.path.expanduser(ics_calendar_full_path), 'w') as f:
f.writelines(general_cal)
with open(os.path.expanduser(ics_calendar_full_path_chores), 'w') as f:
f.writelines(chores_cal)
with open(os.path.expanduser(ics_calendar_full_path_announcements), 'w') as f:
f.writelines(ann_cal)
sys.exit(0)
-
In the above code
Task
is now a class whose instances are also hashable, hence you can also store unique tasks in a set. -
create_task_calendar_description
is now the__str__
representation of the task. create_uniqueness
has been replaced withid
property.is_unique_calendar_task
has been removed and instead we use task’sid
andid_valid
properties to identify whether we want to process it or not.- We are no longer maintaining unique tasks in a global list, instead we are now using a set in
get_unique_tasks
to keep track of already processed(and valid) tasks. You could also change the logic here to just store just the task’sid
in set.