von DPunch » Do, 25.10.2012 12:25
Servus
Es gibt wohl direkt über die API eine Möglichkeit, den MIME auszulesen (
OOo Wiki -> Property MediaType), aber mir ist das nicht gelungen.
Stattdessen habe ich auf die Alternative, die über das Auslesen der sogenannten "Magic number", also den Anfangsbytes der Datei, geht, zurückgegriffen:
Code: Alles auswählen
'Defines how far to read from the start of the file, for performance this can be adjusted to the longest known byte signature length
Private Const nBytesToRead as Integer = 22
'Collection that holds the filetypes
Private cHexSignatures
Sub Main
sURL = "D:\meinBild.JPG"
MsgBox GetFileType(sURL,True)
End Sub
Function GetFileType(sURL as String, bReturnSignatureIfUnknown as Boolean)
On Error GoTo ErrorOccured
If NOT FileExists(sURL) Then
GetFileType = "File not found: " & sURL
Exit Function
End If
sURL = ConvertToURL(sURL)
oFileAccess = CreateUNOService("com.sun.star.ucb.SimpleFileAccess")
oInpDataStream = CreateUNOService("com.sun.star.io.DataInputStream")
Dim aByte() as Byte
oInpDataStream.setInputStream(oFileAccess.openFileRead(sUrl))
oInpDataStream.readBytes(aByte,nBytesToRead)
nUpper = UBound(aByte)
If nUpper = -1 Then GoTo ZeroLengthFile
Dim aHexArray(nUpper) as String
For i = 0 To nUpper
nByte = aByte(i)
'BYTE is signed in Basic so a shift to unsigned is needed
If nByte < 0 Then nByte = 256 + nByte
'Add to HexArray with a leading zero if needed
aHexArray(i) = Right("0" & HEX(nByte),2)
Next i
'If not yet done, add known signatures to collection
If isEmpty(cHexSignatures) Then FillCollection
sTranslated = GetTranslatedSignature(aHexArray)
'Empty string means no match found
If sTranslated = "" Then
If bReturnSignatureIfUnknown = True Then
GetFileType = Join(aHexArray," ")
Else
GetFileType = "Unknown signature"
End If
Else
GetFileType = sTranslated
End If
On Error Resume Next
Finally:
oInpDataStream.closeInput
Exit Function
ZeroLengthFile:
GetFileType = "Zero length file"
GoTo Finally
ErrorOccured:
GetFileType = "An error occured"
GoTo Finally
End Function
Function GetTranslatedSignature(ByVal aHexSignature) as String
On Error Resume Next
For i = UBound(aHexSignature) To 0 Step -1
'Check for hits from longest to shortest signature, cut off last entry after each loop
ReDim Preserve aHexSignature(i)
s = ""
s = cHexSignatures.Item(Join(aHexSignature," "))
If s <> "" Then Exit For
Next i
If i >= 0 Then
GetTranslatedSignature = s
Else
GetTranslatedSignature = ""
End If
End Function
Sub FillCollection
cHexSignatures = CreateObject("Collection")
'Add items using .add("Filetype to display","Hex-Signature separated by spaces")
cHexSignatures.add("PDF","25 50 44 46")
cHexSignatures.add("JPEG IMAGE","FF D8 FF E0")
cHexSignatures.add("JPEG IMAGE (EXIF)","FF D8 FF E1")
cHexSignatures.add("JPEG IMAGE (SPIFF)","FF D8 FF E8")
cHexSignatures.add("DVD video file","00 00 01 BA")
cHexSignatures.add("MPEG video file","00 00 01 B3")
cHexSignatures.add("Windows Media Video/Audio","30 26 B2 75 8E 66 CF 11")
cHexSignatures.add("QuickTime movie file","6D 6F 6F 76")
cHexSignatures.add("Flash movie file","46 4C 56")
cHexSignatures.add("WinRAR compressed archive","52 61 72 21 1A 07 00")
cHexSignatures.add("MP3 audio file","49 44 33")
cHexSignatures.add("AVI container file","52 49 46 46")
cHexSignatures.add("Open Document file","50 4B 03 04 14 00 00 08")
cHexSignatures.add("OXT file","50 4B 03 04 14 00 00 08 08")
cHexSignatures.add("Windows/Dos executable file","4D 5A")
cHexSignatures.add(">>Possible<< text (XML,HTML) file","3C")
End Sub
Für die Hex-Signaturen gab es leider anscheinend keine verbindliche Liste, deshalb habe ich nur ein paar ausgewählte Formate eingepflegt.
Bei Bedarf kann die Liste auf Basis von z.B.
File Signature Database,
Wikipedia: List of file signatures,
File signatures table oder eben einfache Google-Suche in der Prozedur "FillCollection" erweitert werden.
Servus
Es gibt wohl direkt über die API eine Möglichkeit, den MIME auszulesen ([url=http://wiki.openoffice.org/wiki/Documentation/DevGuide/UCB/Services_and_Interfaces]OOo Wiki[/url] -> Property MediaType), aber mir ist das nicht gelungen.
Stattdessen habe ich auf die Alternative, die über das Auslesen der sogenannten "Magic number", also den Anfangsbytes der Datei, geht, zurückgegriffen:
[code]'Defines how far to read from the start of the file, for performance this can be adjusted to the longest known byte signature length
Private Const nBytesToRead as Integer = 22
'Collection that holds the filetypes
Private cHexSignatures
Sub Main
sURL = "D:\meinBild.JPG"
MsgBox GetFileType(sURL,True)
End Sub
Function GetFileType(sURL as String, bReturnSignatureIfUnknown as Boolean)
On Error GoTo ErrorOccured
If NOT FileExists(sURL) Then
GetFileType = "File not found: " & sURL
Exit Function
End If
sURL = ConvertToURL(sURL)
oFileAccess = CreateUNOService("com.sun.star.ucb.SimpleFileAccess")
oInpDataStream = CreateUNOService("com.sun.star.io.DataInputStream")
Dim aByte() as Byte
oInpDataStream.setInputStream(oFileAccess.openFileRead(sUrl))
oInpDataStream.readBytes(aByte,nBytesToRead)
nUpper = UBound(aByte)
If nUpper = -1 Then GoTo ZeroLengthFile
Dim aHexArray(nUpper) as String
For i = 0 To nUpper
nByte = aByte(i)
'BYTE is signed in Basic so a shift to unsigned is needed
If nByte < 0 Then nByte = 256 + nByte
'Add to HexArray with a leading zero if needed
aHexArray(i) = Right("0" & HEX(nByte),2)
Next i
'If not yet done, add known signatures to collection
If isEmpty(cHexSignatures) Then FillCollection
sTranslated = GetTranslatedSignature(aHexArray)
'Empty string means no match found
If sTranslated = "" Then
If bReturnSignatureIfUnknown = True Then
GetFileType = Join(aHexArray," ")
Else
GetFileType = "Unknown signature"
End If
Else
GetFileType = sTranslated
End If
On Error Resume Next
Finally:
oInpDataStream.closeInput
Exit Function
ZeroLengthFile:
GetFileType = "Zero length file"
GoTo Finally
ErrorOccured:
GetFileType = "An error occured"
GoTo Finally
End Function
Function GetTranslatedSignature(ByVal aHexSignature) as String
On Error Resume Next
For i = UBound(aHexSignature) To 0 Step -1
'Check for hits from longest to shortest signature, cut off last entry after each loop
ReDim Preserve aHexSignature(i)
s = ""
s = cHexSignatures.Item(Join(aHexSignature," "))
If s <> "" Then Exit For
Next i
If i >= 0 Then
GetTranslatedSignature = s
Else
GetTranslatedSignature = ""
End If
End Function
Sub FillCollection
cHexSignatures = CreateObject("Collection")
'Add items using .add("Filetype to display","Hex-Signature separated by spaces")
cHexSignatures.add("PDF","25 50 44 46")
cHexSignatures.add("JPEG IMAGE","FF D8 FF E0")
cHexSignatures.add("JPEG IMAGE (EXIF)","FF D8 FF E1")
cHexSignatures.add("JPEG IMAGE (SPIFF)","FF D8 FF E8")
cHexSignatures.add("DVD video file","00 00 01 BA")
cHexSignatures.add("MPEG video file","00 00 01 B3")
cHexSignatures.add("Windows Media Video/Audio","30 26 B2 75 8E 66 CF 11")
cHexSignatures.add("QuickTime movie file","6D 6F 6F 76")
cHexSignatures.add("Flash movie file","46 4C 56")
cHexSignatures.add("WinRAR compressed archive","52 61 72 21 1A 07 00")
cHexSignatures.add("MP3 audio file","49 44 33")
cHexSignatures.add("AVI container file","52 49 46 46")
cHexSignatures.add("Open Document file","50 4B 03 04 14 00 00 08")
cHexSignatures.add("OXT file","50 4B 03 04 14 00 00 08 08")
cHexSignatures.add("Windows/Dos executable file","4D 5A")
cHexSignatures.add(">>Possible<< text (XML,HTML) file","3C")
End Sub[/code]
Für die Hex-Signaturen gab es leider anscheinend keine verbindliche Liste, deshalb habe ich nur ein paar ausgewählte Formate eingepflegt.
Bei Bedarf kann die Liste auf Basis von z.B. [url=http://www.filesignatures.net/]File Signature Database[/url], [url=http://en.wikipedia.org/wiki/List_of_file_signatures]Wikipedia: List of file signatures[/url], [url=http://wangrui.wordpress.com/2007/06/19/file-signatures-table/]File signatures table[/url] oder eben einfache Google-Suche in der Prozedur "FillCollection" erweitert werden.