Use more accurate language detection for syntax highlighting (#1891)

Updated spotter to version 4 in order to get prism syntax mode for detected coding languages.
Expose syntax modes of coding languages as headers on content endpoint and as fields on diff dto.
Remove leading line break on search result fragments.
Use mark instead of span or strong for highlighted search results.
Add option to use syntax highlighting in TextHitField component.

Co-authored-by: Matthias Thieroff <matthias.thieroff@cloudogu.com>
This commit is contained in:
Sebastian Sdorra
2021-12-13 17:03:08 +01:00
committed by GitHub
parent 6eba01161f
commit e2d63cc2a1
34 changed files with 809 additions and 802 deletions

View File

@@ -141,7 +141,7 @@ public class ContentResourceTest {
Response response = contentResource.get(NAMESPACE, REPO_NAME, REV, "SomeGoCode.go", null, null);
assertEquals(200, response.getStatus());
assertEquals("golang", response.getHeaderString("X-Programming-Language"));
assertEquals("Go", response.getHeaderString("X-Programming-Language"));
assertEquals("text/x-go", response.getHeaderString("Content-Type"));
}
@@ -152,10 +152,22 @@ public class ContentResourceTest {
Response response = contentResource.get(NAMESPACE, REPO_NAME, REV, "Dockerfile", null, null);
assertEquals(200, response.getStatus());
assertEquals("dockerfile", response.getHeaderString("X-Programming-Language"));
assertEquals("Dockerfile", response.getHeaderString("X-Programming-Language"));
assertEquals("text/plain", response.getHeaderString("Content-Type"));
}
@Test
public void shouldRecognizeSyntaxModes() throws Exception {
mockContentFromResource("SomeGoCode.go");
Response response = contentResource.get(NAMESPACE, REPO_NAME, REV, "SomeGoCode.go", null, null);
assertEquals(200, response.getStatus());
assertEquals("golang", response.getHeaderString("X-Syntax-Mode-Ace"));
assertEquals("go", response.getHeaderString("X-Syntax-Mode-Codemirror"));
assertEquals("go", response.getHeaderString("X-Syntax-Mode-Prism"));
}
@Test
public void shouldHandleRandomByteFile() throws Exception {
mockContentFromResource("JustBytes");
@@ -190,6 +202,7 @@ public class ContentResourceTest {
assertEquals("application/octet-stream", response.getHeaderString("Content-Type"));
}
@SuppressWarnings("UnstableApiUsage")
private void mockContentFromResource(String fileName) throws Exception {
URL url = Resources.getResource(fileName);
mockContent(fileName, Resources.toByteArray(url));

View File

@@ -60,11 +60,16 @@ class DiffResultToDiffResultDtoMapperTest {
DiffResultDto dto = mapper.mapForRevision(REPOSITORY, createResult(), "123");
List<DiffResultDto.FileDto> files = dto.getFiles();
assertAddedFile(files.get(0), "A.java", "abc", "java");
assertModifiedFile(files.get(1), "B.ts", "abc", "def", "typescript");
assertDeletedFile(files.get(2), "C.go", "ghi", "golang");
assertRenamedFile(files.get(3), "typo.ts", "okay.ts", "def", "fixed", "typescript");
assertCopiedFile(files.get(4), "good.ts", "better.ts", "def", "fixed", "typescript");
assertAddedFile(files.get(0), "A.java", "abc", "Java");
assertModifiedFile(files.get(1), "B.ts", "abc", "def", "TypeScript");
DiffResultDto.FileDto cGo = files.get(2);
assertDeletedFile(cGo, "C.go", "ghi", "Go");
assertThat(cGo.getSyntaxModes())
.containsEntry("ace", "golang")
.containsEntry("codemirror", "go")
.containsEntry("prism", "go");
assertRenamedFile(files.get(3), "typo.ts", "okay.ts", "def", "fixed", "TypeScript");
assertCopiedFile(files.get(4), "good.ts", "better.ts", "def", "fixed", "TypeScript");
DiffResultDto.HunkDto hunk = files.get(1).getHunks().get(0);
assertHunk(hunk, "@@ -3,4 1,2 @@", 1, 2, 3, 4);

View File

@@ -24,15 +24,15 @@
package sonia.scm.io;
import org.assertj.core.api.Assertions;
import org.junit.jupiter.api.Nested;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.ValueSource;
import java.nio.charset.StandardCharsets;
import java.util.Map;
import static org.assertj.core.api.AssertionsForClassTypes.assertThat;
import static org.assertj.core.api.Assertions.assertThat;
class DefaultContentTypeResolverTest {
@@ -84,42 +84,45 @@ class DefaultContentTypeResolverTest {
"% Which does not start with markdown"
);
ContentType contentType = contentTypeResolver.resolve("somedoc.md", content.getBytes(StandardCharsets.UTF_8));
Assertions.assertThat(contentType.getLanguage()).contains("markdown");
assertThat(contentType.getLanguage()).contains("Markdown");
}
@Test
void shouldResolveMarkdownWithoutContent() {
ContentType contentType = contentTypeResolver.resolve("somedoc.md");
Assertions.assertThat(contentType.getLanguage()).contains("markdown");
assertThat(contentType.getLanguage()).contains("Markdown");
}
@Test
void shouldResolveMarkdownEvenWithDotsInFilename() {
ContentType contentType = contentTypeResolver.resolve("somedoc.1.1.md");
Assertions.assertThat(contentType.getLanguage()).contains("markdown");
assertThat(contentType.getLanguage()).contains("Markdown");
}
@Test
void shouldResolveDockerfile() {
ContentType contentType = contentTypeResolver.resolve("Dockerfile");
Assertions.assertThat(contentType.getLanguage()).contains("dockerfile");
assertThat(contentType.getLanguage()).contains("Dockerfile");
}
}
@Nested
class GetSyntaxModesTests {
@Test
void shouldReturnAceModeIfPresent() {
assertThat(contentTypeResolver.resolve("app.go").getLanguage()).contains("golang"); // codemirror is just go
assertThat(contentTypeResolver.resolve("App.java").getLanguage()).contains("java"); // codemirror is clike
void shouldReturnEmptyMapOfModesWithoutLanguage() {
Map<String, String> syntaxModes = contentTypeResolver.resolve("app.exe").getSyntaxModes();
assertThat(syntaxModes).isEmpty();
}
@Test
void shouldReturnCodemirrorIfAceModeIsMissing() {
assertThat(contentTypeResolver.resolve("index.ecr").getLanguage()).contains("htmlmixed");
}
@Test
void shouldReturnTextIfNoModeIsPresent() {
assertThat(contentTypeResolver.resolve("index.hxml").getLanguage()).contains("text");
void shouldReturnMapOfModes() {
Map<String, String> syntaxModes = contentTypeResolver.resolve("app.rs").getSyntaxModes();
assertThat(syntaxModes)
.containsEntry("ace", "rust")
.containsEntry("codemirror", "rust")
.containsEntry("prism", "rust");
}
}

View File

@@ -47,8 +47,6 @@ import static org.mockito.Mockito.when;
@ExtendWith(MockitoExtension.class)
class LuceneHighlighterTest {
@Test
void shouldHighlightText() throws InvalidTokenOffsetsException, IOException {
StandardAnalyzer analyzer = new StandardAnalyzer();
@@ -80,6 +78,15 @@ class LuceneHighlighterTest {
);
}
@Test
void shouldNotStartHighlightedFragmentWithLineBreak() throws IOException, InvalidTokenOffsetsException {
String[] snippets = highlightCode("GameOfLife.java", "die");
assertThat(snippets).hasSize(1).allSatisfy(
snippet -> assertThat(snippet).doesNotStartWith("\n")
);
}
@Test
void shouldHighlightCodeInTsx() throws IOException, InvalidTokenOffsetsException {
String[] snippets = highlightCode("Button.tsx", "inherit");