feat: generate metadata from content on revision creation

Signed-off-by: Tilman Vatteroth <git@tilmanvatteroth.de>
This commit is contained in:
Tilman Vatteroth 2023-06-11 12:56:27 +02:00
parent 90df9a4e32
commit e84096b5dc
5 changed files with 198 additions and 5 deletions

View file

@ -56,8 +56,10 @@
"diff": "5.1.0", "diff": "5.1.0",
"express-session": "1.17.3", "express-session": "1.17.3",
"file-type": "16.5.4", "file-type": "16.5.4",
"htmlparser2": "9.0.0",
"joi": "17.9.2", "joi": "17.9.2",
"ldapauth-fork": "5.0.5", "ldapauth-fork": "5.0.5",
"markdown-it": "13.0.1",
"minio": "7.1.1", "minio": "7.1.1",
"mysql": "2.18.1", "mysql": "2.18.1",
"node-fetch": "2.6.11", "node-fetch": "2.6.11",

View file

@ -11,10 +11,12 @@ import { Repository } from 'typeorm';
import { NotInDBError } from '../errors/errors'; import { NotInDBError } from '../errors/errors';
import { ConsoleLoggerService } from '../logger/console-logger.service'; import { ConsoleLoggerService } from '../logger/console-logger.service';
import { Note } from '../notes/note.entity'; import { Note } from '../notes/note.entity';
import { Tag } from '../notes/tag.entity';
import { EditService } from './edit.service'; import { EditService } from './edit.service';
import { RevisionMetadataDto } from './revision-metadata.dto'; import { RevisionMetadataDto } from './revision-metadata.dto';
import { RevisionDto } from './revision.dto'; import { RevisionDto } from './revision.dto';
import { Revision } from './revision.entity'; import { Revision } from './revision.entity';
import { extractRevisionMetadataFromContent } from './utils/extract-revision-metadata-from-content';
class RevisionUserInfo { class RevisionUserInfo {
usernames: string[]; usernames: string[];
@ -121,6 +123,9 @@ export class RevisionsService {
createdAt: revision.createdAt, createdAt: revision.createdAt,
authorUsernames: revisionUserInfo.usernames, authorUsernames: revisionUserInfo.usernames,
anonymousAuthorCount: revisionUserInfo.anonymousUserCount, anonymousAuthorCount: revisionUserInfo.anonymousUserCount,
title: revision.title,
description: revision.description,
tags: (await revision.tags).map((tag) => tag.name),
}; };
} }
@ -131,6 +136,9 @@ export class RevisionsService {
content: revision.content, content: revision.content,
length: revision.length, length: revision.length,
createdAt: revision.createdAt, createdAt: revision.createdAt,
title: revision.title,
tags: (await revision.tags).map((tag) => tag.name),
description: revision.description,
authorUsernames: revisionUserInfo.usernames, authorUsernames: revisionUserInfo.usernames,
anonymousAuthorCount: revisionUserInfo.anonymousUserCount, anonymousAuthorCount: revisionUserInfo.anonymousUserCount,
patch: revision.patch, patch: revision.patch,
@ -147,18 +155,35 @@ export class RevisionsService {
newContent: string, newContent: string,
yjsStateVector?: number[], yjsStateVector?: number[],
): Promise<Revision | undefined> { ): Promise<Revision | undefined> {
// TODO: Save metadata const latestRevision =
const latestRevision = await this.getLatestRevision(note); note.id === undefined ? undefined : await this.getLatestRevision(note);
const oldContent = latestRevision.content; const oldContent = latestRevision?.content;
if (oldContent === newContent) { if (oldContent === newContent) {
return undefined; return undefined;
} }
const patch = createPatch( const patch = createPatch(
note.publicId, note.publicId,
latestRevision.content, latestRevision?.content ?? '',
newContent, newContent,
); );
const revision = Revision.create(newContent, patch, note, yjsStateVector); const { title, description, tags } =
extractRevisionMetadataFromContent(newContent);
const tagEntities = tags.map((tagName) => {
const entity = new Tag();
entity.name = tagName;
return entity;
});
const revision = Revision.create(
newContent,
patch,
note,
yjsStateVector ?? null,
title,
description,
tagEntities,
) as Revision;
return await this.revisionRepository.save(revision); return await this.revisionRepository.save(revision);
} }
} }

View file

@ -0,0 +1,77 @@
/*
* SPDX-FileCopyrightText: 2023 The HedgeDoc developers (see AUTHORS file)
*
* SPDX-License-Identifier: AGPL-3.0-only
*/
import { extractRevisionMetadataFromContent } from './extract-revision-metadata-from-content';
describe('revision entity', () => {
it('works without frontmatter without first heading', () => {
const { title, description, tags } = extractRevisionMetadataFromContent(
'This is a note content',
);
expect(title).toBe('');
expect(description).toBe('');
expect(tags).toStrictEqual([]);
});
it('works with broken frontmatter', () => {
const { title, description, tags } = extractRevisionMetadataFromContent(
'---\ntitle: \n - 1\n - 2\n---\nThis is a note content',
);
expect(title).toBe('');
expect(description).toBe('');
expect(tags).toStrictEqual([]);
});
it('works with frontmatter title', () => {
const { title, description, tags } = extractRevisionMetadataFromContent(
'---\ntitle: note title\n---\nThis is a note content',
);
expect(title).toBe('note title');
expect(description).toBe('');
expect(tags).toStrictEqual([]);
});
it('works with first heading title', () => {
const { title, description, tags } = extractRevisionMetadataFromContent(
'# Note Title Heading\nThis is a note content',
);
expect(title).toBe('Note Title Heading');
expect(description).toBe('');
expect(tags).toStrictEqual([]);
});
it('works with frontmatter description', () => {
const { title, description, tags } = extractRevisionMetadataFromContent(
'---\ndescription: note description\n---\nNote content',
);
expect(title).toBe('');
expect(description).toBe('note description');
expect(tags).toStrictEqual([]);
});
it('extracts tags as list', async () => {
const { title, description, tags } = extractRevisionMetadataFromContent(
'---\ntags: \n - tag1\n - tag2\n---\nNote content',
);
expect(title).toBe('');
expect(description).toBe('');
expect(tags).toStrictEqual(['tag1', 'tag2']);
});
it('extracts tags in legacy syntax', async () => {
const { title, description, tags } = extractRevisionMetadataFromContent(
'---\ntags: "tag1, tag2"\n---\nNote content',
);
expect(title).toBe('');
expect(description).toBe('');
expect(tags).toStrictEqual(['tag1', 'tag2']);
});
});

View file

@ -0,0 +1,87 @@
/*
* SPDX-FileCopyrightText: 2023 The HedgeDoc developers (see AUTHORS file)
*
* SPDX-License-Identifier: AGPL-3.0-only
*/
import {
convertRawFrontmatterToNoteFrontmatter,
defaultNoteFrontmatter,
extractFirstHeading,
extractFrontmatter,
generateNoteTitle,
NoteFrontmatter,
parseRawFrontmatterFromYaml,
} from '@hedgedoc/commons';
import { parseDocument } from 'htmlparser2';
import MarkdownIt from 'markdown-it';
interface FrontmatterExtractionResult {
title: string;
description: string;
tags: string[];
}
interface FrontmatterParserResult {
frontmatter: NoteFrontmatter;
firstLineOfContentIndex: number;
}
/**
* Parses the frontmatter of the given content and extracts the metadata that are necessary to create a new revision..
*
* @param {string} content the revision content that contains the frontmatter.
*/
export function extractRevisionMetadataFromContent(
content: string,
): FrontmatterExtractionResult {
const parserResult = parseFrontmatter(content);
const frontmatter = parserResult?.frontmatter;
const firstLineOfContentIndex = parserResult?.firstLineOfContentIndex;
const title = generateNoteTitle(frontmatter, () =>
extractFirstHeadingFromContent(
generateContentWithoutFrontmatter(firstLineOfContentIndex, content),
),
);
const description = frontmatter?.description ?? '';
const tags = frontmatter?.tags ?? [];
return { title, description, tags };
}
function generateContentWithoutFrontmatter(
firstLineOfContentIndex: number | undefined,
content: string,
): string {
return firstLineOfContentIndex === undefined
? content
: content.split('\n').slice(firstLineOfContentIndex).join('\n');
}
function parseFrontmatter(
content: string,
): FrontmatterParserResult | undefined {
const extractionResult = extractFrontmatter(content.split('\n'));
const rawText = extractionResult?.rawText;
if (!rawText) {
return undefined;
}
const firstLineOfContentIndex = extractionResult.lineOffset + 1;
const rawDataValidation = parseRawFrontmatterFromYaml(rawText);
const noteFrontmatter =
rawDataValidation.error !== undefined
? defaultNoteFrontmatter
: convertRawFrontmatterToNoteFrontmatter(rawDataValidation.value);
return {
frontmatter: noteFrontmatter,
firstLineOfContentIndex: firstLineOfContentIndex,
};
}
function extractFirstHeadingFromContent(content: string): string | undefined {
const markdownIt = new MarkdownIt('default');
const html = markdownIt.render(content);
const document = parseDocument(html);
return extractFirstHeading(document);
}

View file

@ -2332,10 +2332,12 @@ __metadata:
eslint-plugin-prettier: 4.2.1 eslint-plugin-prettier: 4.2.1
express-session: 1.17.3 express-session: 1.17.3
file-type: 16.5.4 file-type: 16.5.4
htmlparser2: 9.0.0
http-proxy-middleware: 2.0.6 http-proxy-middleware: 2.0.6
jest: 29.5.0 jest: 29.5.0
joi: 17.9.2 joi: 17.9.2
ldapauth-fork: 5.0.5 ldapauth-fork: 5.0.5
markdown-it: 13.0.1
minio: 7.1.1 minio: 7.1.1
mocked-env: 1.3.5 mocked-env: 1.3.5
mysql: 2.18.1 mysql: 2.18.1