From e84096b5dcc6a9d36a4b46819204d3b5ab1a5f6a Mon Sep 17 00:00:00 2001 From: Tilman Vatteroth Date: Sun, 11 Jun 2023 12:56:27 +0200 Subject: [PATCH] feat: generate metadata from content on revision creation Signed-off-by: Tilman Vatteroth --- backend/package.json | 2 + backend/src/revisions/revisions.service.ts | 35 ++++++-- ...act-revision-metadata-from-content.spec.ts | 77 ++++++++++++++++ .../extract-revision-metadata-from-content.ts | 87 +++++++++++++++++++ yarn.lock | 2 + 5 files changed, 198 insertions(+), 5 deletions(-) create mode 100644 backend/src/revisions/utils/extract-revision-metadata-from-content.spec.ts create mode 100644 backend/src/revisions/utils/extract-revision-metadata-from-content.ts diff --git a/backend/package.json b/backend/package.json index fda5cc20e..53e0feb73 100644 --- a/backend/package.json +++ b/backend/package.json @@ -56,8 +56,10 @@ "diff": "5.1.0", "express-session": "1.17.3", "file-type": "16.5.4", + "htmlparser2": "9.0.0", "joi": "17.9.2", "ldapauth-fork": "5.0.5", + "markdown-it": "13.0.1", "minio": "7.1.1", "mysql": "2.18.1", "node-fetch": "2.6.11", diff --git a/backend/src/revisions/revisions.service.ts b/backend/src/revisions/revisions.service.ts index c853f64d1..93bbddfe9 100644 --- a/backend/src/revisions/revisions.service.ts +++ b/backend/src/revisions/revisions.service.ts @@ -11,10 +11,12 @@ import { Repository } from 'typeorm'; import { NotInDBError } from '../errors/errors'; import { ConsoleLoggerService } from '../logger/console-logger.service'; import { Note } from '../notes/note.entity'; +import { Tag } from '../notes/tag.entity'; import { EditService } from './edit.service'; import { RevisionMetadataDto } from './revision-metadata.dto'; import { RevisionDto } from './revision.dto'; import { Revision } from './revision.entity'; +import { extractRevisionMetadataFromContent } from './utils/extract-revision-metadata-from-content'; class RevisionUserInfo { usernames: string[]; @@ -121,6 +123,9 @@ export class RevisionsService { createdAt: revision.createdAt, authorUsernames: revisionUserInfo.usernames, anonymousAuthorCount: revisionUserInfo.anonymousUserCount, + title: revision.title, + description: revision.description, + tags: (await revision.tags).map((tag) => tag.name), }; } @@ -131,6 +136,9 @@ export class RevisionsService { content: revision.content, length: revision.length, createdAt: revision.createdAt, + title: revision.title, + tags: (await revision.tags).map((tag) => tag.name), + description: revision.description, authorUsernames: revisionUserInfo.usernames, anonymousAuthorCount: revisionUserInfo.anonymousUserCount, patch: revision.patch, @@ -147,18 +155,35 @@ export class RevisionsService { newContent: string, yjsStateVector?: number[], ): Promise { - // TODO: Save metadata - const latestRevision = await this.getLatestRevision(note); - const oldContent = latestRevision.content; + const latestRevision = + note.id === undefined ? undefined : await this.getLatestRevision(note); + const oldContent = latestRevision?.content; if (oldContent === newContent) { return undefined; } const patch = createPatch( note.publicId, - latestRevision.content, + latestRevision?.content ?? '', newContent, ); - const revision = Revision.create(newContent, patch, note, yjsStateVector); + const { title, description, tags } = + extractRevisionMetadataFromContent(newContent); + + const tagEntities = tags.map((tagName) => { + const entity = new Tag(); + entity.name = tagName; + return entity; + }); + + const revision = Revision.create( + newContent, + patch, + note, + yjsStateVector ?? null, + title, + description, + tagEntities, + ) as Revision; return await this.revisionRepository.save(revision); } } diff --git a/backend/src/revisions/utils/extract-revision-metadata-from-content.spec.ts b/backend/src/revisions/utils/extract-revision-metadata-from-content.spec.ts new file mode 100644 index 000000000..742be7ee9 --- /dev/null +++ b/backend/src/revisions/utils/extract-revision-metadata-from-content.spec.ts @@ -0,0 +1,77 @@ +/* + * SPDX-FileCopyrightText: 2023 The HedgeDoc developers (see AUTHORS file) + * + * SPDX-License-Identifier: AGPL-3.0-only + */ +import { extractRevisionMetadataFromContent } from './extract-revision-metadata-from-content'; + +describe('revision entity', () => { + it('works without frontmatter without first heading', () => { + const { title, description, tags } = extractRevisionMetadataFromContent( + 'This is a note content', + ); + expect(title).toBe(''); + expect(description).toBe(''); + expect(tags).toStrictEqual([]); + }); + + it('works with broken frontmatter', () => { + const { title, description, tags } = extractRevisionMetadataFromContent( + '---\ntitle: \n - 1\n - 2\n---\nThis is a note content', + ); + + expect(title).toBe(''); + expect(description).toBe(''); + expect(tags).toStrictEqual([]); + }); + + it('works with frontmatter title', () => { + const { title, description, tags } = extractRevisionMetadataFromContent( + '---\ntitle: note title\n---\nThis is a note content', + ); + + expect(title).toBe('note title'); + expect(description).toBe(''); + expect(tags).toStrictEqual([]); + }); + + it('works with first heading title', () => { + const { title, description, tags } = extractRevisionMetadataFromContent( + '# Note Title Heading\nThis is a note content', + ); + + expect(title).toBe('Note Title Heading'); + expect(description).toBe(''); + expect(tags).toStrictEqual([]); + }); + + it('works with frontmatter description', () => { + const { title, description, tags } = extractRevisionMetadataFromContent( + '---\ndescription: note description\n---\nNote content', + ); + + expect(title).toBe(''); + expect(description).toBe('note description'); + expect(tags).toStrictEqual([]); + }); + + it('extracts tags as list', async () => { + const { title, description, tags } = extractRevisionMetadataFromContent( + '---\ntags: \n - tag1\n - tag2\n---\nNote content', + ); + + expect(title).toBe(''); + expect(description).toBe(''); + expect(tags).toStrictEqual(['tag1', 'tag2']); + }); + + it('extracts tags in legacy syntax', async () => { + const { title, description, tags } = extractRevisionMetadataFromContent( + '---\ntags: "tag1, tag2"\n---\nNote content', + ); + + expect(title).toBe(''); + expect(description).toBe(''); + expect(tags).toStrictEqual(['tag1', 'tag2']); + }); +}); diff --git a/backend/src/revisions/utils/extract-revision-metadata-from-content.ts b/backend/src/revisions/utils/extract-revision-metadata-from-content.ts new file mode 100644 index 000000000..1d8a9f1e5 --- /dev/null +++ b/backend/src/revisions/utils/extract-revision-metadata-from-content.ts @@ -0,0 +1,87 @@ +/* + * SPDX-FileCopyrightText: 2023 The HedgeDoc developers (see AUTHORS file) + * + * SPDX-License-Identifier: AGPL-3.0-only + */ +import { + convertRawFrontmatterToNoteFrontmatter, + defaultNoteFrontmatter, + extractFirstHeading, + extractFrontmatter, + generateNoteTitle, + NoteFrontmatter, + parseRawFrontmatterFromYaml, +} from '@hedgedoc/commons'; +import { parseDocument } from 'htmlparser2'; +import MarkdownIt from 'markdown-it'; + +interface FrontmatterExtractionResult { + title: string; + description: string; + tags: string[]; +} + +interface FrontmatterParserResult { + frontmatter: NoteFrontmatter; + firstLineOfContentIndex: number; +} + +/** + * Parses the frontmatter of the given content and extracts the metadata that are necessary to create a new revision.. + * + * @param {string} content the revision content that contains the frontmatter. + */ +export function extractRevisionMetadataFromContent( + content: string, +): FrontmatterExtractionResult { + const parserResult = parseFrontmatter(content); + const frontmatter = parserResult?.frontmatter; + const firstLineOfContentIndex = parserResult?.firstLineOfContentIndex; + + const title = generateNoteTitle(frontmatter, () => + extractFirstHeadingFromContent( + generateContentWithoutFrontmatter(firstLineOfContentIndex, content), + ), + ); + const description = frontmatter?.description ?? ''; + const tags = frontmatter?.tags ?? []; + + return { title, description, tags }; +} + +function generateContentWithoutFrontmatter( + firstLineOfContentIndex: number | undefined, + content: string, +): string { + return firstLineOfContentIndex === undefined + ? content + : content.split('\n').slice(firstLineOfContentIndex).join('\n'); +} + +function parseFrontmatter( + content: string, +): FrontmatterParserResult | undefined { + const extractionResult = extractFrontmatter(content.split('\n')); + const rawText = extractionResult?.rawText; + if (!rawText) { + return undefined; + } + + const firstLineOfContentIndex = extractionResult.lineOffset + 1; + const rawDataValidation = parseRawFrontmatterFromYaml(rawText); + const noteFrontmatter = + rawDataValidation.error !== undefined + ? defaultNoteFrontmatter + : convertRawFrontmatterToNoteFrontmatter(rawDataValidation.value); + return { + frontmatter: noteFrontmatter, + firstLineOfContentIndex: firstLineOfContentIndex, + }; +} + +function extractFirstHeadingFromContent(content: string): string | undefined { + const markdownIt = new MarkdownIt('default'); + const html = markdownIt.render(content); + const document = parseDocument(html); + return extractFirstHeading(document); +} diff --git a/yarn.lock b/yarn.lock index 0f35730a1..aa0f145d3 100644 --- a/yarn.lock +++ b/yarn.lock @@ -2332,10 +2332,12 @@ __metadata: eslint-plugin-prettier: 4.2.1 express-session: 1.17.3 file-type: 16.5.4 + htmlparser2: 9.0.0 http-proxy-middleware: 2.0.6 jest: 29.5.0 joi: 17.9.2 ldapauth-fork: 5.0.5 + markdown-it: 13.0.1 minio: 7.1.1 mocked-env: 1.3.5 mysql: 2.18.1