Skip to content

Commit

Permalink
add "sub topic" enum kind for topics
Browse files Browse the repository at this point in the history
  • Loading branch information
AltriusRS committed Jan 19, 2024
1 parent 3e97507 commit 5d88333
Show file tree
Hide file tree
Showing 11 changed files with 655 additions and 396 deletions.
Empty file modified .github/workflows/docker-image.yml
100644 → 100755
Empty file.
1 change: 1 addition & 0 deletions .gitignore
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -130,3 +130,4 @@ dist
.yarn/install-state.gz
.pnp.*

stamps
Empty file modified Dockerfile
100644 → 100755
Empty file.
1 change: 0 additions & 1 deletion README.md
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ The general workflow the app follows is as such:

1) Pull episode data from [WhenPlane](https://whenplane.com)
2) Process all episodes, updating existing entries with the newest data, and inserting new entries where required
3)



2 changes: 1 addition & 1 deletion package-lock.json
100644 → 100755

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Empty file modified package.json
100644 → 100755
Empty file.
278 changes: 278 additions & 0 deletions src/helpers/parser.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,278 @@
const TIMESTAMP_EXTRACTOR = /((\d{1,2}:{0,1}){1,3}:(\d{1,2}:{0,1}))/;
const CONTINUANCE_EXTRACTOR = /\[cont.\]/i;
const MAINLINE_EXTRACTOR = /\[(?:(?:\d?){1,2}(?::{0,1}?)){2,3}\]/;
const TOPIC_EXTRACTOR = /topic\s#{0,1}(\d+)+:*\s(.+)\*/i;
const TANGENT_EXTRACTOR = /\s{1,3}>\s(.+)/;
const SPONSOR_EXTRACTOR =
/\*(?:sponsors?)\s?(?:spots?)?\s?(?:ft\.?|feat\.?|featuring)?\s?(?:dennis)?.*\*/i;
const MERCH_MESSAGE_EXTRACTOR = /merch\smessages\s#{0,1}(\d+)+(?:.+)\*/i;
const UNKNOWN_TYPE_EXTRACTOR = /\*(.*)\*/i;

const IGNORE_HEADERS = [
"Chapters.",
"Chapters",
"Intro.",
"Intro",
"Outro.",
"Outro",
];

export type Topic = {
id?: string;
episodeId: string;
parent?: string;
title: string;
start: number;
end: number;
created: Date;
modified: Date;
ref?: string;
kind: "topic" | "merch message" | "sponsor" | "tangent" | "sub topic";
children?: Topic[];
};

export function parseDocument(text: string): Topic[] {
const topics: Topic[] = [];
const continuances = new Map<number, number>();
const lines: string[] = text.split("\n");
let inSponsorSpots = false;
let inMerchMessages = false;
let wasContinued = false;

for (const line of lines) {
if (MAINLINE_EXTRACTOR.test(line)) {
const timestamp = fromHumanReadable(
TIMESTAMP_EXTRACTOR.exec(line)?.[0] || "00:00"
);
if (TOPIC_EXTRACTOR.test(line)) {
if (inSponsorSpots) inSponsorSpots = false;
if (inMerchMessages) inMerchMessages = false;
const [, raw_id, title] = TOPIC_EXTRACTOR.exec(line) || [];
topics.push({
id: raw_id,
episodeId: "",
title,
start: timestamp,
end: 0,
created: new Date(),
modified: new Date(),
kind: "topic",
children: [],
});
} else if (SPONSOR_EXTRACTOR.test(line)) {
if (inMerchMessages) inMerchMessages = false;
inSponsorSpots = true;
topics.push({
id: "sponsors",
episodeId: "",
title: "Sponsor Spots",
start: timestamp,
end: 0,
created: new Date(),
modified: new Date(),
kind: "sponsor",
children: [],
});
} else if (MERCH_MESSAGE_EXTRACTOR.test(line)) {
if (inSponsorSpots) inSponsorSpots = false;
inMerchMessages = true;
const [, raw_id] = MERCH_MESSAGE_EXTRACTOR.exec(line) || [];
topics.push({
id: "merch_messages - " + raw_id,
episodeId: "",
title: "Merch Messages",
start: timestamp,
end: 0,
created: new Date(),
modified: new Date(),
kind: "merch message",
children: [],
});
} else {
const [, title] = UNKNOWN_TYPE_EXTRACTOR.exec(line) || [];
if (inSponsorSpots) inSponsorSpots = false;
if (inMerchMessages) inMerchMessages = false;

if (IGNORE_HEADERS.includes(title)) continue;
console.log(title, "timestamp: " + timestamp);
topics.push({
id: "unknown-" + Date.now(),
episodeId: "",
title: title,
start: timestamp,
end: 0,
created: new Date(),
modified: new Date(),
kind: "topic",
children: [],
});
}

if (wasContinued) {
if (topics[topics.length - 2].start > 0) continue;
topics[topics.length - 2].start = topics[topics.length - 1].start;
wasContinued = false;
}
} else if (CONTINUANCE_EXTRACTOR.test(line)) {
wasContinued = true;
if (TOPIC_EXTRACTOR.test(line)) {
if (inSponsorSpots) inSponsorSpots = false;
if (inMerchMessages) inMerchMessages = false;
const [, raw_id, title] = TOPIC_EXTRACTOR.exec(line) || [];
const id = parseInt(raw_id);

let revisionCount = continuances.get(id);
if (revisionCount === undefined) revisionCount = 1;
continuances.set(id, revisionCount + 1);
topics.push({
id: `${raw_id}.${revisionCount}`,
episodeId: "",
title: "Cont. " + title,
start: 0,
end: 0,
created: new Date(),
modified: new Date(),
kind: "topic",
children: [],
});
} else if (SPONSOR_EXTRACTOR.test(line)) {
if (inMerchMessages) inMerchMessages = false;
inSponsorSpots = true;
topics.push({
id: "sponsors",
episodeId: "",
title: "Sponsor Spots",
start: 0,
end: 0,
created: new Date(),
modified: new Date(),
kind: "sponsor",
children: [],
});
} else if (MERCH_MESSAGE_EXTRACTOR.test(line)) {
if (inSponsorSpots) inSponsorSpots = false;
inMerchMessages = true;
const [, raw_id] = MERCH_MESSAGE_EXTRACTOR.exec(line) || [];
topics.push({
id: "merch_messages - " + raw_id,
episodeId: "",
title: "Merch Messages",
start: 0,
end: 0,
created: new Date(),
modified: new Date(),
kind: "merch message",
children: [],
});
} else {
const [, title] = UNKNOWN_TYPE_EXTRACTOR.exec(line) || [];
if (inSponsorSpots) inSponsorSpots = false;
if (inMerchMessages) inMerchMessages = false;

if (IGNORE_HEADERS.includes(title)) continue;

topics.push({
id: "unknown-" + Date.now(),
episodeId: "",
title: title,
start: 0,
end: 0,
created: new Date(),
modified: new Date(),
kind: "tangent",
children: [],
});
}
} else if (TANGENT_EXTRACTOR.test(line)) {
const timestamp = fromHumanReadable(
TIMESTAMP_EXTRACTOR.exec(line)?.[0] || "00:00"
);
const title = line.split(TIMESTAMP_EXTRACTOR).pop() || "Unknown";
if (inSponsorSpots) {
topics[topics.length - 1].children?.push({
id: "",
episodeId: "",
title: title,
start: timestamp,
end: 0,
kind: "sponsor",
created: new Date(),
modified: new Date(),
});
} else if (inMerchMessages) {
topics[topics.length - 1].children?.push({
id: "",
episodeId: "",
title: title,
start: timestamp,
end: 0,
kind: "merch message",
created: new Date(),
modified: new Date(),
});
} else {
topics[topics.length - 1].children?.push({
id: "",
episodeId: "",
title: title,
start: timestamp,
end: 0,
kind: "sub topic",
created: new Date(),
modified: new Date(),
});
}

if (wasContinued) {
topics[topics.length - 2].start = topics[topics.length - 1].start;
wasContinued = false;
}
}
}

const finalized: Topic[] = topics.map((topic, i, tps) => {
if (i + 1 < tps.length) {
const next = tps[i + 1];

if (next.start === 0) {
if (next.children !== undefined && next.children.length > 0) {
next.start = next.children[0].start;
} else next.start = 0;
tps[i + 1] = next;
}

topic.end = next.start - 1;
if (topic.children !== undefined)
topic.children = topic.children.map((child, idx, parent) => {
if (idx + 1 < parent.length) {
const nextChild = parent[idx + 1];
child.end = nextChild.start - 1;
} else if (idx + 1 === parent.length) {
child.end = next.start - 1;
}
return child;
});
}

return topic;
});

return finalized;
}

function fromHumanReadable(timestamp: string): number {
const pieces: string[] = timestamp.split(":");

const raw_seconds: string | undefined = pieces.pop();
const raw_minutes: string | undefined = pieces.pop();
const raw_hours: string | undefined = pieces.pop();
const raw_days: string | undefined = pieces.pop();

let seconds = 0;
if (raw_seconds) seconds += parseInt(raw_seconds);
if (raw_minutes) seconds += parseInt(raw_minutes) * 60;
if (raw_hours) seconds += parseInt(raw_hours) * 60 * 60;
if (raw_days) seconds += parseInt(raw_days) * 24 * 60 * 60;

return seconds;
}
18 changes: 18 additions & 0 deletions src/helpers/sponsors.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
export function resolveSponsor(
text: string,
sponsors: any[]
): string | undefined {
let sponsorId: string | undefined;
for (let { regex, company } of sponsors) {
let match = regex.test(text);

if (match) {
sponsorId = company;
break;
}
}

return sponsorId;
}

// async function addSponsorSpot();
53 changes: 53 additions & 0 deletions src/helpers/topics.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
import { timestamp } from "drizzle-orm/mysql-core";
import { Topic } from "./parser";
import { ChangeStatus, changelog, changes, comments, topics } from "datakit";

export type Chaneglog = {
changelogId: string;
status: ChangeStatus;
added: Date;
modified: Date;
authorId: string;
title: string;
start: number;
end: number;
};

export async function addTopic(client: any, topic: Topic): Promise<string> {
let [result] = await client.data.insert(topics).values(topic).returning();

return result.id;
}

export async function addTopicChangelog(
client: any,
references: string
): Promise<void> {
await client.data.insert(changelog).values({
topicId: references,
started: new Date(),
});
}

// async function proposeChange(client: any, log: string, status: ChangeStatus, author: string, title: string, start: number, end: number): Promise<string>
export async function proposeChange(
client: any,
log: Chaneglog
): Promise<string> {
let [result] = await client.data.insert(changes).values(log).returning();

return result.id;
}

export async function addComment(
client: any,
changelog: string,
author: string,
message: string
): Promise<void> {
await client.data.insert(comments).values({
changeId: changelog,
message,
authorId: author,
});
}
Loading

0 comments on commit 5d88333

Please sign in to comment.