Initial commit: SharePoint connector and ToothFairyAI integration
Some checks failed
CI - SharePoint Plugin with SonarQube / Test and SonarQube Analysis (push) Has been cancelled
Some checks failed
CI - SharePoint Plugin with SonarQube / Test and SonarQube Analysis (push) Has been cancelled
This commit is contained in:
145
test_document_parser.py
Normal file
145
test_document_parser.py
Normal file
@@ -0,0 +1,145 @@
|
||||
"""
|
||||
Unit tests for document_parser.py
|
||||
"""
|
||||
import pytest
|
||||
from document_parser import DocumentParser, get_file_info
|
||||
|
||||
|
||||
class TestDocumentParser:
|
||||
"""Test DocumentParser class."""
|
||||
|
||||
def setup_method(self):
|
||||
"""Set up test fixtures."""
|
||||
self.parser = DocumentParser()
|
||||
|
||||
def test_can_parse_supported_extensions(self):
|
||||
"""Test can_parse returns True for supported file types."""
|
||||
supported_files = [
|
||||
'document.txt', 'readme.md', 'data.csv', 'config.json',
|
||||
'report.pdf', 'document.docx', 'spreadsheet.xlsx', 'slides.pptx',
|
||||
'script.py', 'code.java', 'style.css', 'index.html'
|
||||
]
|
||||
|
||||
for filename in supported_files:
|
||||
assert self.parser.can_parse(filename), f"Should parse {filename}"
|
||||
|
||||
def test_can_parse_unsupported_extensions(self):
|
||||
"""Test can_parse returns False for unsupported file types."""
|
||||
unsupported_files = [
|
||||
'image.png', 'video.mp4', 'audio.mp3', 'archive.zip',
|
||||
'binary.exe', 'document.doc'
|
||||
]
|
||||
|
||||
for filename in unsupported_files:
|
||||
assert not self.parser.can_parse(filename), f"Should not parse {filename}"
|
||||
|
||||
def test_get_extension(self):
|
||||
"""Test _get_extension method."""
|
||||
assert self.parser._get_extension('file.txt') == '.txt'
|
||||
assert self.parser._get_extension('FILE.TXT') == '.txt'
|
||||
assert self.parser._get_extension('archive.tar.gz') == '.gz'
|
||||
assert self.parser._get_extension('noextension') == ''
|
||||
|
||||
def test_parse_text_utf8(self):
|
||||
"""Test parsing UTF-8 text files."""
|
||||
content = "Hello World\nThis is a test".encode('utf-8')
|
||||
result = self.parser.parse(content, 'test.txt')
|
||||
assert result == "Hello World\nThis is a test"
|
||||
|
||||
def test_parse_text_multiple_encodings(self):
|
||||
"""Test parsing text with different encodings."""
|
||||
content = "Test content"
|
||||
|
||||
# UTF-8
|
||||
result = self.parser._parse_text(content.encode('utf-8'))
|
||||
assert result == "Test content"
|
||||
|
||||
# Latin-1
|
||||
result = self.parser._parse_text(content.encode('latin-1'))
|
||||
assert result == "Test content"
|
||||
|
||||
def test_parse_unsupported_file_raises_error(self):
|
||||
"""Test parsing unsupported file type raises ValueError."""
|
||||
with pytest.raises(ValueError, match="Unsupported file type"):
|
||||
self.parser.parse(b"content", "file.exe")
|
||||
|
||||
def test_parse_json(self):
|
||||
"""Test parsing JSON files."""
|
||||
content = '{"key": "value", "number": 123}'.encode('utf-8')
|
||||
result = self.parser.parse(content, 'data.json')
|
||||
assert '"key": "value"' in result
|
||||
assert '"number": 123' in result
|
||||
|
||||
def test_parse_csv(self):
|
||||
"""Test parsing CSV files."""
|
||||
content = "name,age,city\nAlice,30,NYC\nBob,25,LA".encode('utf-8')
|
||||
result = self.parser.parse(content, 'data.csv')
|
||||
assert "Alice" in result
|
||||
assert "30" in result
|
||||
assert "NYC" in result
|
||||
|
||||
|
||||
class TestGetFileInfo:
|
||||
"""Test get_file_info function."""
|
||||
|
||||
def test_document_category(self):
|
||||
"""Test document file type categorization."""
|
||||
info = get_file_info('report.pdf', 1024)
|
||||
assert info['category'] == 'document'
|
||||
assert info['extension'] == '.pdf'
|
||||
|
||||
def test_spreadsheet_category(self):
|
||||
"""Test spreadsheet file type categorization."""
|
||||
info = get_file_info('data.xlsx', 2048)
|
||||
assert info['category'] == 'spreadsheet'
|
||||
assert info['extension'] == '.xlsx'
|
||||
|
||||
def test_presentation_category(self):
|
||||
"""Test presentation file type categorization."""
|
||||
info = get_file_info('slides.pptx', 4096)
|
||||
assert info['category'] == 'presentation'
|
||||
|
||||
def test_code_category(self):
|
||||
"""Test code file type categorization."""
|
||||
info = get_file_info('script.py', 512)
|
||||
assert info['category'] == 'code'
|
||||
|
||||
def test_image_category(self):
|
||||
"""Test image file type categorization."""
|
||||
info = get_file_info('photo.jpg', 8192)
|
||||
assert info['category'] == 'image'
|
||||
|
||||
def test_file_size_bytes(self):
|
||||
"""Test file size formatting in bytes."""
|
||||
info = get_file_info('small.txt', 512)
|
||||
assert info['size_formatted'] == '512 B'
|
||||
assert info['size_bytes'] == 512
|
||||
|
||||
def test_file_size_kilobytes(self):
|
||||
"""Test file size formatting in KB."""
|
||||
info = get_file_info('medium.txt', 2048)
|
||||
assert 'KB' in info['size_formatted']
|
||||
assert info['size_bytes'] == 2048
|
||||
|
||||
def test_file_size_megabytes(self):
|
||||
"""Test file size formatting in MB."""
|
||||
info = get_file_info('large.pdf', 5 * 1024 * 1024)
|
||||
assert 'MB' in info['size_formatted']
|
||||
assert '5.0' in info['size_formatted']
|
||||
|
||||
def test_file_size_gigabytes(self):
|
||||
"""Test file size formatting in GB."""
|
||||
info = get_file_info('huge.zip', 2 * 1024 * 1024 * 1024)
|
||||
assert 'GB' in info['size_formatted']
|
||||
assert '2.0' in info['size_formatted']
|
||||
|
||||
def test_unknown_extension(self):
|
||||
"""Test unknown file extension."""
|
||||
info = get_file_info('file.xyz', 1024)
|
||||
assert info['category'] == 'file'
|
||||
assert info['extension'] == '.xyz'
|
||||
|
||||
def test_no_extension(self):
|
||||
"""Test file with no extension."""
|
||||
info = get_file_info('README', 1024)
|
||||
assert info['extension'] == ''
|
||||
Reference in New Issue
Block a user