ȨÀ¸·Î
| ¹«·áȸ¿ø°¡ÀÔ | ¾ÆÀ̵ð/ºñ¹øÃ£±â
ÃßõÀ½¾Ç¹æ¼Û
ÆÄÀ̽㿡¼­ À¯´ÏÄÚµå ½ºÆ®¸² ´Ù·ç±â
6³â Àü
ÆÄÀ̽㿡¼­ À¯´ÏÄڵ带 ´Ù·ê ¶§´Â ÀϹÝÀûÀ¸·Î str.decode()¿Í unicode.encode() ¸Þ¼­µå¸¦ »ç¿ëÇÏ¿© unicode ŸÀÔ°ú str ŸÀÔÀ» »óÈ£ º¯È¯ÇÑ´Ù.

¾Æ·¡ ¿¹½Ã¿¡¼­´Â 'utf-16'À¸·Î ÀÛ¼ºµÈ ÆÄÀÏÀ» ¿­¾î, ¼öÁ÷ ÅÇ(vertical tab) ÄÚµåÆ÷ÀÎÆ®¸¦ Áö¿î ´ÙÀ½, 'utf-8'·Î ÀúÀåÇÑ´Ù. (±úÁø XMLÀ» ´Ù·ê ¶§ ÀÌ ¹æ½ÄÀÌ ¸Å¿ì Áß¿äÇÏ´Ù.)

# ÆÄÀÏ ³»¿ëÀ» Àд´Ù
with open("input.txt", "rb") as input:  
    data = input.read()

# ¹ÙÀ̳ʸ® µ¥ÀÌÅ͸¦ utf-16À¸·Î µðÄÚµùÇÑ´Ù
data = data.decode("utf-16")

# ¼öÁ÷ ÅÇÀ» »èÁ¦ÇÑ´Ù
data = data.replace(u"\u000B", u"")

# À¯´ÏÄÚµå µ¥ÀÌÅ͸¦ utf-8·Î ÀÎÄÚµùÇÑ´Ù
data = data.encode("utf-8")

# µ¥ÀÌÅ͸¦ utf-8·Î ÀúÀåÇÑ´Ù
with open("output.txt", "wb") as output:  
    output.write(data)
¾öû³ª°Ô Å« ÆÄÀÏÀ» ´Ù·ê ¶§°¡ ¾Æ´Ï¶ó¸é ÀÌ Á¤µµ·Îµµ ÃæºÐÇÏ´Ù. ÇÏÁö¸¸ Å« ÆÄÀÏÀ» ´Ù·ê ¶© ¸ðµç µ¥ÀÌÅͰ¡ ¸Þ¸ð¸®¿¡ ¿Ã¶ó°£´Ù´Â »ç½ÇÀÌ ¹®Á¦°¡ µÈ´Ù.

½ºÆ®¸®¹Ö ÀÎÄÚ´õ/µðÄÚ´õ »ç¿ëÇϱâ
ÆÄÀ̽㠱⺻ ¶óÀ̺귯¸®¿¡´Â codecs ¸ðµâÀÌ Æ÷ÇԵǾî ÀÖ´Ù. ÀÌ ¸ðµâÀ» »ç¿ëÇÏ¸é ÆÄÀÏÀ» Á¶±Ý¾¿ ÀÐÀ» ¼ö ÀÖ°í, ¸Þ¸ð¸®¿¡µµ ¾à°£ÀÇ À¯´ÏÄÚµå µ¥ÀÌÅ͸¸ ¿Ã¶ó°¡°Ô µÈ´Ù.

codecs.open() ÇïÆÛ ¸Þ¼­µå¸¦ »ç¿ëÇÏ¿© À§ÀÇ ¿¹½Ã¸¦ ÃÖ¼ÒÇѸ¸ °íÃĺ¸ÀÚ.

import codecs

# ÀÔ·Â ½ºÆ®¸²°ú Ãâ·Â ½ºÆ®¸²À» ¿¬´Ù
input = codecs.open("input.txt", "rb", encoding="utf-16")  
output = codecs.open("output.txt", "wb", encoding="utf-8")

# À¯´ÏÄÚµå µ¥ÀÌÅÍ Á¶°¢µéÀ» ½ºÆ®¸®¹ÖÇÑ´Ù
with input, output:  
    while True:
        # µ¥ÀÌÅÍ Á¶°¢À» Àаí
        chunk = input.read(4096)
        if not chunk:
            break
        # ¼öÁ÷ ÅÇÀ» »èÁ¦ÇÑ´Ù
        chunk = chunk.replace(u"\u000B", u"")
        # µ¥ÀÌÅÍ Á¶°¢À» ¾´´Ù
        output.write(chunk)
ÆÄÀÏÀº ²ûÂïÇØ! ÀÌÅÍ·¹ÀÌÅÍ »ç¿ëÇϱâ
ÆÄÀÏÀº ´Ù·ç±â°¡ Á» Áö·çÇÏ´Ù. º¹ÀâÇÑ Ã³¸® °úÁ¤¿¡´Â À¯´ÏÄÚµå µ¥ÀÌÅÍÀÇ ÀÌÅÍ·¹ÀÌÅ͸¦ ´Ù·ç´Â ÆíÀÌ ±ò²ûÇÒ °ÍÀÌ´Ù.

¾Æ·¡´Â iterdecode()¸¦ »ç¿ëÇÏ¿©, ÆÄÀÏÀ» À¯´ÏÄÚµå µ¥ÀÌÅÍ Á¶°¢ÀÇ ÀÌÅÍ·¹ÀÌÅÍ·Î Àд ȿ°úÀûÀÎ ¹æ¹ýÀÌ´Ù.

from functools import partial  
from codecs import iterdecode

# ƯÁ¤ pathÀÇ ÆÄÀÏÀ» À¯´ÏÄÚµå Á¶°¢ÀÇ ÀÌÅÍ·¹ÀÌÅÍ·Î ¸®ÅÏÇÑ´Ù
def iter_unicode_chunks(path, encoding):  
    # ÀÐÀ» ÆÄÀÏÀ» ¿¬´Ù
    with open(path, "rb") as input:
        # ¹ÙÀ̳ʸ® ÆÄÀÏÀ» ¹ÙÀ̳ʸ® Á¶°¢À¸·Î º¯È¯ÇÑ´Ù
        binary_chunks = iter(partial(input.read, 1), "")
        # ¹ÙÀ̳ʸ® Á¶°¢À» À¯´ÏÄÚµå Á¶°¢À¸·Î º¯È¯ÇÑ´Ù
        for unicode_chunk in iterdecode(binary_chunks, encoding):
            yield unicode_chunk
ÀÌÁ¦ iterencode() ¸Þ¼­µå¸¦ »ç¿ëÇÏ¿©, À¯´ÏÄÚµå Á¶°¢ÀÇ ÀÌÅÍ·¹ÀÌÅ͸¦ ÆÄÀÏ¿¡ ½áº¸ÀÚ.

from codecs import iterencode

# À¯´ÏÄÚµå Á¶°¢ÀÇ ÀÌÅÍ·¹ÀÌÅ͸¦ ƯÁ¤ pathÀÇ ÆÄÀÏ¿¡ ¾´´Ù
def write_unicode_chunks(path, unicode_chunks, encoding):  
    # ¾µ ÆÄÀÏÀ» ¿¬´Ù
    with open(path, "wb") as output:
        # À¯´ÏÄÚµå Á¶°¢À» ¹ÙÀ̳ʸ®·Î º¯È¯ÇÑ´Ù
        for binary_chunk in iterencode(unicode_chunks, encoding):
            output.write(binary_chunk)
ÀÌ µÎ ÇÔ¼ö¿Í ÇÔ²² À¯´ÏÄÚµå µ¥ÀÌÅÍÀÇ ½ºÆ®¸²¿¡¼­ ¼öÁ÷ ÅÇÀ» ¾ø¾Ö´Â ÀÏÀÌ ¸¶¹ý °°ÀÌ ³¡³­´Ù(just becomes a case of plumbing everything together).

# ÆÄÀÏÀ» À¯´ÏÄÚµå Á¶°¢ ÇüÅ·ΠÀд´Ù
unicode_chunks = iter_unicode_chunks("input.txt", encoding="utf-16")

# À¯´ÏÄÚµå Á¶°¢À» ¼öÁ¤ÇÑ´Ù
unicode_chunks = (  
    chunk.replace(u"\u000B", u"")
    for chunk
    in unicode_chunks
)

# À¯´ÏÄÚµå Á¶°¢À» ÆÄÀÏ¿¡ ÀúÀåÇÑ´Ù
write_unicode_chunks("output.txt", unicode_chunks, encoding="utf-8")  
°ÅâÇÏ°Ô codecs ¸ðµâÀ» »ç¿ëÇØ¾ß ÇÒ±î?
¾óÇÍ ±×³É, str.decode()¿Í unicode.encode() ¸Þ¼­µå¸¦ »ç¿ëÇÏ¿© Å« file °´Ã¼¸¦ ¹ÙÀ̳ʸ® Á¶°¢À¸·Î Àаí, ÀÎÄÚµùÇÏ°í µðÄÚµùÇÏ´Â ÆíÀÌ °£´ÜÇÏ´Ù°í »ý°¢ÇÒ ¼öµµ ÀÖ°Ú´Ù.

# ³ª»Û ¿¹½Ã. ÀÌ·¸°Ô ÇÏÁö ¸¶½Ã¿À!

# ÀÔ·Â ½ºÆ®¸²°ú Ãâ·Â ½ºÆ®¸²À» ¿¬´Ù
with open("input.txt", "rb") as input, open("output.txt", "wb") as output:  
    # ¹ÙÀ̳ʸ® µ¥ÀÌÅÍ Á¶°¢µéÀ» ¼øÈ¸ÇÑ´Ù
    while True:
        # µ¥ÀÌÅÍ Á¶°¢À» Àд´Ù
        chunk = input.read(4096)
        if not chunk:
            break
        # À§Çè: ¹ÙÀ̳ʸ® µ¥ÀÌÅ͸¦ utf-16À¸·Î µðÄÚµùÇÑ´Ù
        chunk = chunk.decode("utf-16")
        # ¼öÁ÷ ÅÇÀ» »èÁ¦ÇÑ´Ù
        chunk = chunk.replace(u"\u000B", u"")
        # À¯´ÏÄÚµå µ¥ÀÌÅ͸¦ utf-8·Î ÀÎÄÚµùÇÑ´Ù
        chunk = chunk.encode("utf-8")
        # µ¥ÀÌÅÍ Á¶°¢À» ¾´´Ù
        output.write(chunk)
ºÒÇàÈ÷µµ ¸î¸î À¯´ÏÄÚµå ÄÚµåÆ÷ÀÎÆ®´Â ¹ÙÀ̳ʸ® µ¥ÀÌÅÍÀÇ ÇÑ ¹ÙÀÌÆ® ÀÌ»óÀ¸·Î ÀÎÄÚµùµÈ´Ù. µû¶ó¼­ ´Ü¼øÈ÷ ÆÄÀÏ¿¡¼­ ¹ÙÀÌÆ® Á¶°¢µéÀ» Àо decode() ¸Þ¼­µå¸¦ Àû¿ëÇÏ¸é ¿¹±âÄ¡ ¾Ê°Ô UnicodeDecodeError°¡ ¹ß»ýÇÒ ¼öµµ ÀÖ´Ù. ÀÌ´Â ¹ÙÀÌÆ® ÇÑ Á¶°¢ÀÌ ¿©·¯ ¹ÙÀÌÆ®ÀÇ ÄÚµåÆ÷ÀÎÆ®·Î ºÐ¸®µÇ¾ú±â ¶§¹®ÀÌ´Ù.

codecs ¸ðµâÀÇ µµ±¸µéÀ» »ç¿ëÇϸé ÀÌ·¯ÇÑ ¿¹±âÄ¡ ¾ÊÀº Ãæµ¹À» ¿¹¹æÇÒ ¼ö ÀÖ´Ù.

ÆÄÀ̽ã 3¿¡¼­´Â?
ÆÄÀ̽ã 3¿¡¼­´Â ÈξÀ ´Ü¼øÇÏ°Ô À¯´ÏÄÚµå ÆÄÀÏÀ» ´Ù·ê ¼ö ÀÖ´Ù. ºôÆ®ÀÎ ¸Þ¼­µåÀÎ open()Àº À¯´ÏÄÚµå µ¥ÀÌÅ͸¦ ¼öÁ¤Çϰųª ÀÎÄÚµùÀ» º¯°æÇÏ´Â µ¥ ÇÊ¿äÇÑ ±â´ÉÀ» Æ÷ÇÔÇϰí ÀÖ´Ù.

# ÀÔ·Â ½ºÆ®¸²°ú Ãâ·Â ½ºÆ®¸²À» ¿¬´Ù
input = open("input.txt", "rt", encoding="utf-16")  
output = open("output.txt", "wt", encoding="utf-8")

# À¯´ÏÄÚµå µ¥ÀÌÅÍ Á¶°¢µéÀ» ½ºÆ®¸®¹ÖÇÑ´Ù
with input, output:  
    while True:
        # µ¥ÀÌÅÍ Á¶°¢À» Àаí
        chunk = input.read(4096)
        if not chunk:
            break
        # ¼öÁ÷ ÅÇÀ» »èÁ¦ÇÑ´Ù
        chunk = chunk.replace("\u000B", "")
        # µ¥ÀÌÅÍ Á¶°¢À» ¾´´Ù
        output.write(chunk)
ÆÄÀ̽ã 3ÀÇ ½Ã´ë´Ù! Áñ°Ì°Ô ÄÚµùÇϱæ!
ÃßõÃßõ : 284 Ãßõ ¸ñ·Ï
¹øÈ£ Á¦¸ñ
3,046
 Vimeo (ºñ¸Þ¿À) API ¸¦ »ç¿ëÇÏ¿© Ç÷¹À̾î ÄÁÆ®·ÑÇϱâ
3,045
 iframe »ç¿ë½Ã ÇÏ´Ü¿¡ ¹ß»ýÇÏ´Â °ø¹é Á¦°Å¹æ¹ý
3,044
 ¾ÆÀÌÇÁ·¹ÀÓ(iframe) Àüüȭ¸é °¡´ÉÇÏ°Ô Çϱâ
3,043
 ºÎÆ®½ºÆ®·¦(bootstrapk)¿¡¼­ »ç¿ëÇÏ´Â class¸í Á¤¸®
3,042
 ºÎÆ®½ºÆ®·¦ CSS
3,041
 Å©·Ò¿¡¼­ ¸¶Áø Á¶Àý
3,040
 PHP ÇöÀç ÆäÀÌÁöÀÇ µµ¸ÞÀθíÀ̳ª urlµîÀÇ Á¤º¸ ¾Ë¾Æ¿À±â
3,039
 PHP preg match all()
3,038
 PHP ·Î À¥ÆäÀÌÁö ±Ü¾î¿À±â ¸ðµç ¹æ¹ý ÃÑÁ¤¸®!
3,037
 [PHP] ¿ø°ÝÁö ÆÄÀÏ ÁÖ¼Ò ³ëÃâ ¾ÈÇϰí curl·Î ´Ù¿î·Îµå ¹Þ±â
3,036
 PHP ÇÔ¼ö Á¤¸®
3,035
 ¾ÆÀÌÇÁ·¹ÀÓ(iframe) ºñÀ² À¯ÁöÇϸ鼭 Å©±â Á¶ÀýÇÏ´Â ¹æ¹ý
3,034
 PHP ¹è¿­¿¡¼­ ¹«ÀÛÀ§·Î Çϳª »Ì¾ÆÁÖ´Â array rand() ÇÔ¼ö
3,033
 PHP Á¤±Ô½Ä Á¤¸®
3,032
 PHP Á¤±Ô½ÄÀ» Ȱ¿ëÇÑ ÅÂ±× ¹× ƯÁ¤ ¹®ÀÚ¿­ Á¦°Å ¹× ÃßÃâ ¹æ¹ý
3,031
 php Å©·Ñ¸µ ¶Ç´Â ÆÄ½Ì ÇÔ¼ö, Á¤±Ô½Ä ¸ðÀ½
3,030
 Á¦ÀÌÄõ¸® ±âº» ¸í·É¾î
3,029
 À¥ÆäÀÌÁö °¡·Î ¸ðµå¼¼·Î ¸ðµå ÀνÄÇϱâ
3,028
 ¸ð¹ÙÀÏ À¥ È­¸é °­Á¦ ȸÀü(°¡·Î¸ðµå °íÁ¤)
3,027
 [HTML5]¿¡¼­ frameset ´ëü ¹æ¹ý°ú iframe ¼Ó¼º
3,026
 HTML <Audio> »ç¿ë¹ý
3,025
 À©µµ¿ì10 ½Ã½ºÅÛÆÄÀÏ ¼Õ»ó (Ãʰ£´Ü ¿À·ù º¹±¸¹æ¹ý!!)
3,024
 PHP ÆÄÀÏ Á¸Àç ¿©ºÎ ÆÄ¾ÇÇϱâ(·ÎÄà ÆÄÀÏ Á¸Àç ¹× ¿ø°ÝÁö ÆÄÀÏ Á¸Àç)
3,023
 [CSS] ¹Ú½º ¼¼·Î °¡¿îµ¥ Áß¾Ó Á¤·Ä 6°¡Áö
3,022
 CSS Layout ¼öÆò & ¼öÁ÷ Á¤·Ä
¸ñ·Ï
¹ÂÁ÷Æ®·ÎÆ® ºÎ»ê±¤¿ª½Ã ºÎ»êÁø±¸ °¡¾ßµ¿ ¤Ó °³ÀÎÁ¤º¸Ãë±Þ¹æÄ§
Copyright (C) musictrot All rights reserved.