-
Notifications
You must be signed in to change notification settings - Fork 15
Add initial AI feedback module for PDF thesis artifacts #1149
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from 3 commits
Commits
Show all changes
10 commits
Select commit
Hold shift + click to select a range
c56e623
Add initial AI feedback module for PDF thesis artifacts
bensofficial d1efc2c
Merge branch 'develop' of github.com:ls1intum/thesis-management into …
bensofficial 9c4f594
Fix checkstyle
bensofficial 129b849
Update Server sbom
bensofficial f35fc10
Fix tests and normalize string assertions
bensofficial a595e7d
Resolve comments
bensofficial ddc2e71
Implement reviewer feedback
bensofficial c8cb112
Reject empty PDFs
bensofficial 173b8ee
Add LLM guardrails for uploaded pdf pages
bensofficial 97ade87
Merge remote-tracking branch 'origin/develop' into feat/add-initial-f…
bensofficial File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Some comments aren't visible on the classic Files Changed page.
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
15 changes: 15 additions & 0 deletions
15
server/src/main/java/de/tum/cit/aet/thesis/feedback/config/AIFeaturesEnabled.java
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,15 @@ | ||
| package de.tum.cit.aet.thesis.feedback.config; | ||
|
|
||
| import org.springframework.context.annotation.Condition; | ||
| import org.springframework.context.annotation.ConditionContext; | ||
| import org.springframework.core.type.AnnotatedTypeMetadata; | ||
|
|
||
| public class AIFeaturesEnabled implements Condition { | ||
| public static final String PROPERTY = "thesis-management.ai.enabled"; | ||
|
|
||
| @Override | ||
| public boolean matches(ConditionContext context, AnnotatedTypeMetadata metadata) { | ||
| return Boolean.TRUE.equals( | ||
| context.getEnvironment().getProperty(PROPERTY, Boolean.class, false)); | ||
| } | ||
| } |
55 changes: 55 additions & 0 deletions
55
server/src/main/java/de/tum/cit/aet/thesis/feedback/controller/ReviewController.java
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,55 @@ | ||
| package de.tum.cit.aet.thesis.feedback.controller; | ||
|
|
||
| import de.tum.cit.aet.thesis.feedback.config.AIFeaturesEnabled; | ||
| import de.tum.cit.aet.thesis.feedback.dto.ProviderCategory; | ||
| import de.tum.cit.aet.thesis.feedback.dto.ReviewRequestDTO; | ||
| import de.tum.cit.aet.thesis.feedback.dto.ReviewResultDTO; | ||
| import de.tum.cit.aet.thesis.feedback.service.ReviewService; | ||
| import lombok.extern.slf4j.Slf4j; | ||
| import org.springframework.context.annotation.Conditional; | ||
| import org.springframework.http.HttpStatus; | ||
| import org.springframework.http.MediaType; | ||
| import org.springframework.http.ResponseEntity; | ||
| import org.springframework.security.access.prepost.PreAuthorize; | ||
| import org.springframework.web.bind.annotation.ModelAttribute; | ||
| import org.springframework.web.bind.annotation.PostMapping; | ||
| import org.springframework.web.bind.annotation.RequestMapping; | ||
| import org.springframework.web.bind.annotation.RestController; | ||
| import org.springframework.web.server.ResponseStatusException; | ||
|
|
||
| /** REST controller for AI generated feedback. */ | ||
| @Slf4j | ||
| @RestController | ||
| @RequestMapping("/v2/ai-review") | ||
| @Conditional(AIFeaturesEnabled.class) | ||
| public class ReviewController { | ||
| private final ReviewService reviewService; | ||
|
|
||
| /** | ||
| * Creates the controller with its review service collaborator. | ||
| * | ||
| * @param reviewService service that runs the AI review pipeline | ||
| */ | ||
| public ReviewController(ReviewService reviewService) { | ||
| this.reviewService = reviewService; | ||
| } | ||
|
|
||
| /** | ||
| * Runs the AI review pipeline against an uploaded proposal PDF. | ||
| * | ||
| * @param request multipart payload containing the proposal file and the provider category | ||
| * @return the merged review result produced by the LLM pipeline | ||
| */ | ||
| @PostMapping(value = "review-proposal", consumes = MediaType.MULTIPART_FORM_DATA_VALUE) | ||
| @PreAuthorize("hasAnyRole('admin', 'advisor', 'supervisor')") | ||
| public ResponseEntity<ReviewResultDTO> reviewProposal(@ModelAttribute ReviewRequestDTO request) { | ||
| // TODO: Use already uploaded file from the thesis service instead of uploading it again | ||
|
|
||
| if (request.providerCategory().equals(ProviderCategory.AZURE)) { | ||
|
bensofficial marked this conversation as resolved.
|
||
| throw new ResponseStatusException(HttpStatus.BAD_REQUEST, "Azure provider is not supported yet."); | ||
| } | ||
|
|
||
| ReviewResultDTO reviewResult = reviewService.review(request); | ||
| return ResponseEntity.ok().body(reviewResult); | ||
| } | ||
| } | ||
12 changes: 12 additions & 0 deletions
12
server/src/main/java/de/tum/cit/aet/thesis/feedback/dto/AssessmentCategory.java
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,12 @@ | ||
| package de.tum.cit.aet.thesis.feedback.dto; | ||
|
|
||
| import com.fasterxml.jackson.annotation.JsonProperty; | ||
|
|
||
| public enum AssessmentCategory { | ||
| @JsonProperty("good") | ||
| GOOD, | ||
| @JsonProperty("acceptable") | ||
| ACCEPTABLE, | ||
| @JsonProperty("needs-work") | ||
| NEEDS_WORK | ||
| } |
6 changes: 6 additions & 0 deletions
6
server/src/main/java/de/tum/cit/aet/thesis/feedback/dto/FindingDTO.java
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,6 @@ | ||
| package de.tum.cit.aet.thesis.feedback.dto; | ||
|
|
||
| import java.util.List; | ||
|
|
||
| public record FindingDTO(String severity, String category, String title, String description, List<Location> locations) { | ||
| } |
6 changes: 6 additions & 0 deletions
6
server/src/main/java/de/tum/cit/aet/thesis/feedback/dto/IntermediateReviewResult.java
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,6 @@ | ||
| package de.tum.cit.aet.thesis.feedback.dto; | ||
|
|
||
| import java.util.List; | ||
|
|
||
| public record IntermediateReviewResult(List<FindingDTO> findings) { | ||
| } |
4 changes: 4 additions & 0 deletions
4
server/src/main/java/de/tum/cit/aet/thesis/feedback/dto/Location.java
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,4 @@ | ||
| package de.tum.cit.aet.thesis.feedback.dto; | ||
|
|
||
| public record Location(Integer page, String section, String quote) { | ||
| } | ||
|
bensofficial marked this conversation as resolved.
|
||
5 changes: 5 additions & 0 deletions
5
server/src/main/java/de/tum/cit/aet/thesis/feedback/dto/ProviderCategory.java
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,5 @@ | ||
| package de.tum.cit.aet.thesis.feedback.dto; | ||
|
|
||
| public enum ProviderCategory { | ||
| AZURE, LOCAL; | ||
| } |
6 changes: 6 additions & 0 deletions
6
server/src/main/java/de/tum/cit/aet/thesis/feedback/dto/ReviewRequestDTO.java
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,6 @@ | ||
| package de.tum.cit.aet.thesis.feedback.dto; | ||
|
|
||
| import org.springframework.web.multipart.MultipartFile; | ||
|
|
||
| public record ReviewRequestDTO(ProviderCategory providerCategory, MultipartFile file) { | ||
|
bensofficial marked this conversation as resolved.
Outdated
|
||
| } | ||
6 changes: 6 additions & 0 deletions
6
server/src/main/java/de/tum/cit/aet/thesis/feedback/dto/ReviewResultDTO.java
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,6 @@ | ||
| package de.tum.cit.aet.thesis.feedback.dto; | ||
|
|
||
| import java.util.List; | ||
|
|
||
| public record ReviewResultDTO(AssessmentCategory category, String summary, List<FindingDTO> findings) { | ||
| } |
89 changes: 89 additions & 0 deletions
89
server/src/main/java/de/tum/cit/aet/thesis/feedback/service/PdfService.java
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,89 @@ | ||
| package de.tum.cit.aet.thesis.feedback.service; | ||
|
|
||
| import de.tum.cit.aet.thesis.feedback.config.AIFeaturesEnabled; | ||
| import org.apache.pdfbox.Loader; | ||
| import org.apache.pdfbox.pdmodel.PDDocument; | ||
| import org.apache.pdfbox.rendering.PDFRenderer; | ||
| import org.slf4j.Logger; | ||
| import org.slf4j.LoggerFactory; | ||
| import org.springframework.ai.content.Media; | ||
| import org.springframework.ai.document.Document; | ||
| import org.springframework.ai.reader.pdf.PagePdfDocumentReader; | ||
| import org.springframework.ai.reader.pdf.config.PdfDocumentReaderConfig; | ||
| import org.springframework.context.annotation.Conditional; | ||
| import org.springframework.core.io.ByteArrayResource; | ||
| import org.springframework.stereotype.Service; | ||
| import org.springframework.util.MimeTypeUtils; | ||
| import org.springframework.web.multipart.MultipartFile; | ||
|
|
||
| import javax.imageio.ImageIO; | ||
|
|
||
| import java.io.ByteArrayOutputStream; | ||
| import java.io.IOException; | ||
| import java.util.ArrayList; | ||
| import java.util.List; | ||
|
|
||
| /** | ||
| * Extracts per-page text and renders per-page PNG images from an uploaded PDF so the LLM | ||
| * pipeline can reason about both modalities. | ||
| */ | ||
| @Service | ||
| @Conditional(AIFeaturesEnabled.class) | ||
| public class PdfService { | ||
| private static final Logger log = LoggerFactory.getLogger(PdfService.class); | ||
|
|
||
| /** | ||
| * Extracts the text content of each page of the uploaded PDF. | ||
| * | ||
| * @param file uploaded PDF file | ||
| * @return one string per page in document order | ||
| */ | ||
| public List<String> extractTextFromPdf(MultipartFile file) { | ||
| log.debug("Extracting text from PDF file: {}", file.getOriginalFilename()); | ||
| ByteArrayResource resource; | ||
| try { | ||
| resource = new ByteArrayResource(file.getBytes()); | ||
| } catch (IOException e) { | ||
| throw new RuntimeException("Failed to extract text of file", e); | ||
| } | ||
|
|
||
| PdfDocumentReaderConfig config = PdfDocumentReaderConfig.builder().withPagesPerDocument(1).build(); | ||
| PagePdfDocumentReader reader = new PagePdfDocumentReader(resource, config); | ||
|
|
||
| List<Document> docs = reader.read(); | ||
| return docs.stream().map(Document::getText).toList(); | ||
|
bensofficial marked this conversation as resolved.
|
||
| } | ||
|
|
||
| /** | ||
| * Renders each page of the uploaded PDF to a PNG image at 300 DPI. | ||
| * | ||
| * @param file uploaded PDF file | ||
| * @return one PNG-encoded {@link Media} per page in document order | ||
| */ | ||
| public List<Media> extractImagesFromPdf(MultipartFile file) { | ||
| log.debug("Extracting images from PDF file: {}", file.getOriginalFilename()); | ||
| List<Media> images = new ArrayList<>(); | ||
|
|
||
| try (PDDocument document = Loader.loadPDF(file.getBytes())) { | ||
| PDFRenderer renderer = new PDFRenderer(document); | ||
|
|
||
| for (int page = 0; page < document.getNumberOfPages(); page++) { | ||
|
bensofficial marked this conversation as resolved.
|
||
| var image = renderer.renderImageWithDPI(page, 300); | ||
|
|
||
| ByteArrayOutputStream stream = new ByteArrayOutputStream(); | ||
| ImageIO.write(image, "png", stream); | ||
| byte[] imageBytes = stream.toByteArray(); | ||
|
|
||
| ByteArrayResource resource = new ByteArrayResource(imageBytes); | ||
|
|
||
| Media media = Media.builder().mimeType(MimeTypeUtils.IMAGE_PNG).data(resource).name("page_" + (page + 1) + ".png").build(); | ||
|
|
||
| images.add(media); | ||
| } | ||
| } catch (IOException e) { | ||
| throw new RuntimeException("Failed to extract images of file", e); | ||
| } | ||
|
|
||
| return images; | ||
| } | ||
| } | ||
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.