Some checks failed
CI - SharePoint Plugin with SonarQube / Test and SonarQube Analysis (push) Has been cancelled
146 lines
5.3 KiB
Python
146 lines
5.3 KiB
Python
"""
|
|
Unit tests for document_parser.py
|
|
"""
|
|
import pytest
|
|
from document_parser import DocumentParser, get_file_info
|
|
|
|
|
|
class TestDocumentParser:
|
|
"""Test DocumentParser class."""
|
|
|
|
def setup_method(self):
|
|
"""Set up test fixtures."""
|
|
self.parser = DocumentParser()
|
|
|
|
def test_can_parse_supported_extensions(self):
|
|
"""Test can_parse returns True for supported file types."""
|
|
supported_files = [
|
|
'document.txt', 'readme.md', 'data.csv', 'config.json',
|
|
'report.pdf', 'document.docx', 'spreadsheet.xlsx', 'slides.pptx',
|
|
'script.py', 'code.java', 'style.css', 'index.html'
|
|
]
|
|
|
|
for filename in supported_files:
|
|
assert self.parser.can_parse(filename), f"Should parse {filename}"
|
|
|
|
def test_can_parse_unsupported_extensions(self):
|
|
"""Test can_parse returns False for unsupported file types."""
|
|
unsupported_files = [
|
|
'image.png', 'video.mp4', 'audio.mp3', 'archive.zip',
|
|
'binary.exe', 'document.doc'
|
|
]
|
|
|
|
for filename in unsupported_files:
|
|
assert not self.parser.can_parse(filename), f"Should not parse {filename}"
|
|
|
|
def test_get_extension(self):
|
|
"""Test _get_extension method."""
|
|
assert self.parser._get_extension('file.txt') == '.txt'
|
|
assert self.parser._get_extension('FILE.TXT') == '.txt'
|
|
assert self.parser._get_extension('archive.tar.gz') == '.gz'
|
|
assert self.parser._get_extension('noextension') == ''
|
|
|
|
def test_parse_text_utf8(self):
|
|
"""Test parsing UTF-8 text files."""
|
|
content = "Hello World\nThis is a test".encode('utf-8')
|
|
result = self.parser.parse(content, 'test.txt')
|
|
assert result == "Hello World\nThis is a test"
|
|
|
|
def test_parse_text_multiple_encodings(self):
|
|
"""Test parsing text with different encodings."""
|
|
content = "Test content"
|
|
|
|
# UTF-8
|
|
result = self.parser._parse_text(content.encode('utf-8'))
|
|
assert result == "Test content"
|
|
|
|
# Latin-1
|
|
result = self.parser._parse_text(content.encode('latin-1'))
|
|
assert result == "Test content"
|
|
|
|
def test_parse_unsupported_file_raises_error(self):
|
|
"""Test parsing unsupported file type raises ValueError."""
|
|
with pytest.raises(ValueError, match="Unsupported file type"):
|
|
self.parser.parse(b"content", "file.exe")
|
|
|
|
def test_parse_json(self):
|
|
"""Test parsing JSON files."""
|
|
content = '{"key": "value", "number": 123}'.encode('utf-8')
|
|
result = self.parser.parse(content, 'data.json')
|
|
assert '"key": "value"' in result
|
|
assert '"number": 123' in result
|
|
|
|
def test_parse_csv(self):
|
|
"""Test parsing CSV files."""
|
|
content = "name,age,city\nAlice,30,NYC\nBob,25,LA".encode('utf-8')
|
|
result = self.parser.parse(content, 'data.csv')
|
|
assert "Alice" in result
|
|
assert "30" in result
|
|
assert "NYC" in result
|
|
|
|
|
|
class TestGetFileInfo:
|
|
"""Test get_file_info function."""
|
|
|
|
def test_document_category(self):
|
|
"""Test document file type categorization."""
|
|
info = get_file_info('report.pdf', 1024)
|
|
assert info['category'] == 'document'
|
|
assert info['extension'] == '.pdf'
|
|
|
|
def test_spreadsheet_category(self):
|
|
"""Test spreadsheet file type categorization."""
|
|
info = get_file_info('data.xlsx', 2048)
|
|
assert info['category'] == 'spreadsheet'
|
|
assert info['extension'] == '.xlsx'
|
|
|
|
def test_presentation_category(self):
|
|
"""Test presentation file type categorization."""
|
|
info = get_file_info('slides.pptx', 4096)
|
|
assert info['category'] == 'presentation'
|
|
|
|
def test_code_category(self):
|
|
"""Test code file type categorization."""
|
|
info = get_file_info('script.py', 512)
|
|
assert info['category'] == 'code'
|
|
|
|
def test_image_category(self):
|
|
"""Test image file type categorization."""
|
|
info = get_file_info('photo.jpg', 8192)
|
|
assert info['category'] == 'image'
|
|
|
|
def test_file_size_bytes(self):
|
|
"""Test file size formatting in bytes."""
|
|
info = get_file_info('small.txt', 512)
|
|
assert info['size_formatted'] == '512 B'
|
|
assert info['size_bytes'] == 512
|
|
|
|
def test_file_size_kilobytes(self):
|
|
"""Test file size formatting in KB."""
|
|
info = get_file_info('medium.txt', 2048)
|
|
assert 'KB' in info['size_formatted']
|
|
assert info['size_bytes'] == 2048
|
|
|
|
def test_file_size_megabytes(self):
|
|
"""Test file size formatting in MB."""
|
|
info = get_file_info('large.pdf', 5 * 1024 * 1024)
|
|
assert 'MB' in info['size_formatted']
|
|
assert '5.0' in info['size_formatted']
|
|
|
|
def test_file_size_gigabytes(self):
|
|
"""Test file size formatting in GB."""
|
|
info = get_file_info('huge.zip', 2 * 1024 * 1024 * 1024)
|
|
assert 'GB' in info['size_formatted']
|
|
assert '2.0' in info['size_formatted']
|
|
|
|
def test_unknown_extension(self):
|
|
"""Test unknown file extension."""
|
|
info = get_file_info('file.xyz', 1024)
|
|
assert info['category'] == 'file'
|
|
assert info['extension'] == '.xyz'
|
|
|
|
def test_no_extension(self):
|
|
"""Test file with no extension."""
|
|
info = get_file_info('README', 1024)
|
|
assert info['extension'] == ''
|