Skip to content

Commit d917261

Browse files
committed
Re-encode submission source in UTF-8
Try to re-encode the submission source if it currently is not valid in UTF-8. Add some extra safeguard by forcing another sanity check re-encode from UTF-8 to the original encoding, which filters out binary blobs and verifies that the re-encoding is non-destructive.
1 parent 4018d41 commit d917261

File tree

3 files changed

+20
-3
lines changed

3 files changed

+20
-3
lines changed

webapp/src/Controller/Jury/SubmissionController.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -920,7 +920,7 @@ public function sourceAction(
920920
$files[$f->getFilename()][$submitId] = [
921921
'rank' => $f->getRank(),
922922
'filename' => $f->getFilename(),
923-
'source' => mb_check_encoding($f->getSourcecode(), 'UTF-8') ? $f->getSourcecode() : "Could not display file as UTF-8, is it binary?",
923+
'source' => Utils::reencodeUtf8($f->getSourcecode()),
924924
];
925925

926926
// Keep track of the single filename within a submission for handling renaming.

webapp/src/Twig/TwigExtension.php

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -901,7 +901,7 @@ public function codeEditor(
901901
HTML;
902902
$rank = $index;
903903
$id = sprintf('editor%s', $rank);
904-
$source = mb_check_encoding($code, 'UTF-8') ? $code : "Could not display file as UTF-8, is it binary?";
904+
$source = Utils::reencodeUtf8($code);
905905
if ($elementToUpdate) {
906906
$extraForEdit = <<<JS
907907
editor.getModel().onDidChangeContent(() => {
@@ -951,7 +951,7 @@ public function getMonacoModel(SubmissionFile $file): string
951951
}
952952
$this->renderedSources[$file->getSubmitfileid()] = true;
953953

954-
$source = mb_check_encoding($file->getSourcecode(), 'UTF-8') ? $file->getSourcecode() : "Could not display file as UTF-8, is it binary?";
954+
$source = Utils::reencodeUtf8($file->getSourcecode());
955955
return sprintf(
956956
<<<JS
957957
monaco.editor.createModel(

webapp/src/Utils/Utils.php

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1056,6 +1056,23 @@ public static function jsonEncode(mixed $data): string
10561056
return json_encode($data, JSON_PRESERVE_ZERO_FRACTION | JSON_UNESCAPED_SLASHES | JSON_THROW_ON_ERROR);
10571057
}
10581058

1059+
public static function reencodeUtf8(string $source): string {
1060+
$detectOrder = array_unique(array_merge(mb_detect_order(), mb_list_encodings()));
1061+
$encoding = mb_detect_encoding($source, $detectOrder, strict: true);
1062+
if ($encoding !== false) {
1063+
$encoded = $encoding !== 'UTF-8' ? mb_convert_encoding($source, 'UTF-8', $encoding) : $source;
1064+
// Some binary files are strictly valid in an encoding but fail to re-encoding correctly.
1065+
// A successive strict call to mb_detect_encoding still says it is validly encoded, but rendering the string fails.
1066+
// This will filter these files.
1067+
$sanity = mb_convert_encoding($encoded, $encoding, 'UTF-8');
1068+
if ($source === $sanity) {
1069+
return $encoded;
1070+
}
1071+
}
1072+
return "Could not display file as UTF-8.\n"
1073+
. "Check the supported PHP mbstring encodings on the `Config checker` page if you did not expect this file to be binary.";
1074+
}
1075+
10591076
/**
10601077
* @return array<string, string>
10611078
*/

0 commit comments

Comments
 (0)