Fix language detection in code mirror code blocks

Signed-off-by: Tilman Vatteroth <git@tilmanvatteroth.de>
This commit is contained in:
Tilman Vatteroth 2022-05-22 18:23:34 +02:00
parent ddea0edbf8
commit f328681c0f
6 changed files with 146 additions and 13 deletions

View file

@ -25,12 +25,13 @@ import { useCodeMirrorScrollWatchExtension } from './hooks/code-mirror-extension
import { useCodeMirrorPasteExtension } from './hooks/code-mirror-extensions/use-code-mirror-paste-extension'
import { useCodeMirrorFileDropExtension } from './hooks/code-mirror-extensions/use-code-mirror-file-drop-extension'
import { markdown, markdownLanguage } from '@codemirror/lang-markdown'
import { languages } from '@codemirror/language-data'
import { EditorView } from '@codemirror/view'
import { autocompletion } from '@codemirror/autocomplete'
import { useCodeMirrorFocusReference } from './hooks/use-code-mirror-focus-reference'
import { useOffScreenScrollProtection } from './hooks/use-off-screen-scroll-protection'
import { cypressId } from '../../../utils/cypress-attribute'
import { findLanguageByCodeBlockName } from '../../markdown-renderer/markdown-extension/code-block-markdown-extension/find-language-by-code-block-name'
import { languages } from '@codemirror/language-data'
const logger = new Logger('EditorPane')
@ -62,7 +63,10 @@ export const EditorPane: React.FC<ScrollProps> = ({ scrollState, onScroll, onMak
const extensions = useMemo(
() => [
markdown({ base: markdownLanguage, codeLanguages: languages }),
markdown({
base: markdownLanguage,
codeLanguages: (input) => findLanguageByCodeBlockName(languages, input)
}),
...saveOffFocusScrollStateExtensions,
focusExtension,
EditorView.lineWrapping,

View file

@ -6,9 +6,10 @@
import type MarkdownIt from 'markdown-it'
import type { RuleCore } from 'markdown-it/lib/parser_core'
import Optional from 'optional-js'
import { parseCodeBlockParameters } from './code-block-parameters'
const ruleName = 'code-highlighter'
const codeFenceArguments = /^ *([\w-]*)(.*)$/
/**
* Extracts the language name and additional flags from the code fence parameter and sets them as attributes in the token.
@ -19,16 +20,13 @@ const codeFenceArguments = /^ *([\w-]*)(.*)$/
const rule: RuleCore = (state) => {
state.tokens.forEach((token) => {
if (token.type === 'fence') {
const highlightInfos = codeFenceArguments.exec(token.info)
if (!highlightInfos) {
return
}
if (highlightInfos[1]) {
token.attrJoin('data-highlight-language', highlightInfos[1])
}
if (highlightInfos[2]) {
token.attrJoin('data-extra', highlightInfos[2])
}
const highlightInfos = parseCodeBlockParameters(token.info)
Optional.ofNullable(highlightInfos.language).ifPresent((language) =>
token.attrJoin('data-highlight-language', language)
)
Optional.ofNullable(highlightInfos.codeFenceParameters).ifPresent((language) =>
token.attrJoin('data-extra', language)
)
}
})
return true

View file

@ -0,0 +1,40 @@
/*
* SPDX-FileCopyrightText: 2022 The HedgeDoc developers (see AUTHORS file)
*
* SPDX-License-Identifier: AGPL-3.0-only
*/
import { parseCodeBlockParameters } from './code-block-parameters'
describe('Code block parameter parsing', () => {
it('should detect just the language', () => {
const result = parseCodeBlockParameters('esperanto')
expect(result.language).toBe('esperanto')
expect(result.codeFenceParameters).toBe('')
})
it('should detect an empty string', () => {
const result = parseCodeBlockParameters('')
expect(result.language).toBe('')
expect(result.codeFenceParameters).toBe('')
})
it('should detect additional information after the language', () => {
const result = parseCodeBlockParameters('esperanto!!!!!')
expect(result.language).toBe('esperanto')
expect(result.codeFenceParameters).toBe('!!!!!')
})
it('should detect just the additional information if no language is given', () => {
const result = parseCodeBlockParameters('!!!!!esperanto')
expect(result.language).toBe('')
expect(result.codeFenceParameters).toBe('!!!!!esperanto')
})
it('should detect additional information if separated from the language with a space', () => {
const result = parseCodeBlockParameters('esperanto sed multe')
expect(result.language).toBe('esperanto')
expect(result.codeFenceParameters).toBe('sed multe')
})
it('should ignore spaces at the beginning and the end', () => {
const result = parseCodeBlockParameters(' esperanto sed multe ')
expect(result.language).toBe('esperanto')
expect(result.codeFenceParameters).toBe('sed multe')
})
})

View file

@ -0,0 +1,26 @@
/*
* SPDX-FileCopyrightText: 2022 The HedgeDoc developers (see AUTHORS file)
*
* SPDX-License-Identifier: AGPL-3.0-only
*/
const codeFenceArguments = /^ *([\w-]*)(.*)$/
interface CodeBlockParameters {
language: string
codeFenceParameters: string
}
/**
* Parses the language name and additional parameters from a code block name input.
*
* @param text The text to parse
* @return The parsed parameters
*/
export const parseCodeBlockParameters = (text: string): CodeBlockParameters => {
const parsedText = codeFenceArguments.exec(text)
return {
language: parsedText?.[1].trim() ?? '',
codeFenceParameters: parsedText?.[2].trim() ?? ''
}
}

View file

@ -0,0 +1,37 @@
/*
* SPDX-FileCopyrightText: 2022 The HedgeDoc developers (see AUTHORS file)
*
* SPDX-License-Identifier: AGPL-3.0-only
*/
import { findLanguageByCodeBlockName } from './find-language-by-code-block-name'
import { Mock } from 'ts-mockery'
import type { LanguageDescription } from '@codemirror/language'
describe('filter language name', () => {
const mockedLanguage1 = Mock.of<LanguageDescription>({ name: 'Mocky', alias: ['mocky'] })
const mockedLanguage2 = Mock.of<LanguageDescription>({ name: 'Blocky', alias: ['blocky'] })
const mockedLanguage3 = Mock.of<LanguageDescription>({ name: 'Rocky', alias: ['rocky'] })
const mockedLanguage4 = Mock.of<LanguageDescription>({ name: 'Zocky', alias: ['zocky'] })
const mockedLanguages = [mockedLanguage1, mockedLanguage2, mockedLanguage3, mockedLanguage4]
it('should detect just the name of a language', () => {
expect(findLanguageByCodeBlockName(mockedLanguages, 'Mocky')).toBe(mockedLanguage1)
})
it('should detect the name of a language with parameters', () => {
expect(findLanguageByCodeBlockName(mockedLanguages, 'Blocky!!!')).toBe(mockedLanguage2)
})
it('should detect just the alias of a language', () => {
expect(findLanguageByCodeBlockName(mockedLanguages, 'rocky')).toBe(mockedLanguage3)
})
it('should detect the alias of a language with parameters', () => {
expect(findLanguageByCodeBlockName(mockedLanguages, 'zocky!!!')).toBe(mockedLanguage4)
})
it("shouldn't return a language if no match", () => {
expect(findLanguageByCodeBlockName(mockedLanguages, 'Docky')).toBe(null)
})
})

View file

@ -0,0 +1,28 @@
/*
* SPDX-FileCopyrightText: 2022 The HedgeDoc developers (see AUTHORS file)
*
* SPDX-License-Identifier: AGPL-3.0-only
*/
import Optional from 'optional-js'
import type { LanguageDescription } from '@codemirror/language'
import { parseCodeBlockParameters } from './code-block-parameters'
/**
* Finds the {@link LanguageDescription code mirror language descriptions} that matches the given language name or any alias.
* It ignores additional code block name parameters.
*
* @param languages The languages in which the description should be found
* @param inputLanguageName The input from the code block
* @return The found language description or null if no language could be found by name or alias
*/
export const findLanguageByCodeBlockName = (
languages: LanguageDescription[],
inputLanguageName: string
): LanguageDescription | null => {
return Optional.ofNullable(parseCodeBlockParameters(inputLanguageName).language)
.map<LanguageDescription | null>((filteredLanguage) =>
languages.find((language) => language.name === filteredLanguage || language.alias.includes(filteredLanguage))
)
.orElse(null)
}